1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2025 Oxide Computer Company
14 */
15
16 /*
17 * AMD Zen Unified Memory Controller Driver
18 *
19 * This file forms the core logic around transforming a physical address that
20 * we're used to using into a specific location on a DIMM. This has support for
21 * a wide range of AMD CPUs and APUs ranging from Zen 1 - Zen 4.
22 *
23 * The goal of this driver is to implement the infrastructure and support
24 * necessary to understand how DRAM requests are being routed in the system and
25 * to be able to map those to particular channels and then DIMMs. This is used
26 * as part of RAS (reliability, availability, and serviceability) to enable
27 * aspects around understanding ECC errors, hardware topology, and more. Like
28 * with any software project, there is more to do here. Please see the Future
29 * Work section at the end of this big theory statement for more information.
30 *
31 * -------------------
32 * Driver Organization
33 * -------------------
34 *
35 * This driver is organized into two major pieces:
36 *
37 * 1. Logic to interface with hardware, discover the data fabric, memory
38 * controller configuration, and transform that into a normalized fashion
39 * that can be used across all different Zen family CPUs. This is
40 * implemented generally in this file, and is designed to assume it is in
41 * the kernel (as it requires access to the SMN, DF PCI registers, and the
42 * amdzen nexus driver client services).
43 *
44 * 2. Logic that can take the above normalized memory information and perform
45 * decoding (e.g. physical address to DIMM information). This generally
46 * lives in common/mc/zen_uc/zen_umc_decode.c. This file is in common/,
47 * meaning it is designed to be shared by userland and the kernel. Even
48 * more so, it is designed to operate on a const version of our primary
49 * data structure (zen_umc_t), not allowing it to be modified. This allows
50 * us to more easily unit test the decoding logic and utilize it in other
51 * circumstances such as with the mcdecode utility.
52 *
53 * There is corresponding traditional dev_ops(9S) and cb_ops(9S) logic in the
54 * driver (currently this file) which take care of interfacing with the broader
55 * operating system environment.
56 *
57 * There is only ever one instance of this driver, e.g. it is a singleton in
58 * design pattern parlance. There is a single struct, the zen_umc_t found in the
59 * global (albeit static) variable zen_umc. This structure itself contains a
60 * hierarchical set of structures that describe the system. To make management
61 * of memory simpler, all of the nested structures that we discover from
62 * hardware are allocated in the same structure. The only exception to this rule
63 * is when we cache serialized nvlists for dumping.
64 *
65 * The organization of the structures inside the zen_umc_t, generally mimics the
66 * hardware organization and is structured as follows:
67 *
68 * +-----------+
69 * | zen_umc_t |
70 * +-----------+
71 * |
72 * +-------------------------------+
73 * v v
74 * +--------------+ +--------------+ One instance of the
75 * | zen_umc_df_t | ... | zen_umc_df_t | zen_umc_df_t per
76 * +--------------+ +--------------+ discovered DF.
77 * |||
78 * |||
79 * ||| +----------------+ +----------------+ Global DRAM
80 * ||+--->| df_dram_rule_t | ... | df_dram_rule_t | rules for the
81 * || +----------------+ +----------------+ platform.
82 * ||
83 * || +--------------------+ +--------------------+ UMC remap
84 * |+--->| zen_umc_cs_remap_t | ... | zen_umc_cs_remap_t | rule arrays.
85 * | +--------------------+ +--------------------+
86 * |
87 * v
88 * +----------------+ +----------------+ One structure per
89 * | zen_umc_chan_t | ... | zen_umc_chan_t | discovered DDR4/5
90 * +----------------+ +----------------+ memory channel.
91 * ||||
92 * ||||
93 * |||| +----------------+ +----------------+ Channel specific
94 * |||+--->| df_dram_rule_t | ... | df_dram_rule_t | copy of DRAM rules.
95 * ||| +----------------+ +----------------+ Less than global.
96 * |||
97 * ||| +---------------+ +---------------+ Per-Channel DRAM
98 * ||+---->| chan_offset_t | ... | chan_offset_t | offset that is used
99 * || +---------------+ +---------------+ for normalization.
100 * ||
101 * || +-----------------+ Channel-specific
102 * |+----->| umc_chan_hash_t | hashing rules.
103 * | +-----------------+
104 * |
105 * | +------------+ +------------+ One structure for
106 * +------>| umc_dimm_t | ... | umc_dimm_t | each DIMM in the
107 * +------------+ +------------+ channel. Always two.
108 * |
109 * | +----------+ +----------+ Per chip-select
110 * +---> | umc_cs_t | ... | umc_cs_t | data. Always two.
111 * +----------+ +----------+
112 *
113 * In the data structures themselves you'll often find several pieces of data
114 * that have the term 'raw' in their name. The point of these is to basically
115 * capture the original value that we read from the register before processing
116 * it. These are generally used either for debugging or to help answer future
117 * curiosity with resorting to the udf and usmn tooling, which hopefully aren't
118 * actually installed on systems.
119 *
120 * With the exception of some of the members in the zen_umc_t that are around
121 * management of state for userland ioctls, everything in the structure is
122 * basically write-once and from that point on should be treated as read-only.
123 *
124 * ---------------
125 * Memory Decoding
126 * ---------------
127 *
128 * To understand the process of memory decoding, it's worth going through and
129 * understanding a bunch of the terminology that is used in this process. As an
130 * additional reference when understanding this, you may want to turn to either
131 * an older generation AMD BIOS and Kernel Developer's Guide or the more current
132 * Processor Programming Reference. In addition, the imc driver, which is the
133 * Intel equivalent, also provides an additional bit of reference.
134 *
135 * SYSTEM ADDRESS
136 *
137 * This is a physical address and is the way that the operating system
138 * normally thinks of memory. System addresses can refer to many different
139 * things. For example, you have traditional DRAM, memory-mapped PCIe
140 * devices, peripherals that the processor exposes such as the xAPIC, data
141 * from the FCH (Fusion Controller Hub), etc.
142 *
143 * TOM, TOM2, and the DRAM HOLE
144 *
145 * Physical memory has a complicated layout on x86 in part because of
146 * support for traditional 16-bit and 32-bit systems. As a result, contrary
147 * to popular belief, DRAM is not at a consistent address range in the
148 * processor. AMD processors have a few different ranges. There is a 32-bit
149 * region that starts at effectively physical address zero and goes to the
150 * TOM MSR (top of memory -- Core::X86::Msr::TOP_MEM). This indicates a
151 * limit below 4 GiB, generally around 2 GiB.
152 *
153 * From there, the next region of DRAM starts at 4 GiB and goes to TOM2
154 * (top of memory 2 -- Core::X86::Msr::TOM2). The region between TOM and
155 * 4 GiB is called the DRAM hole. Physical addresses in this region are
156 * used for memory mapped I/O. This breaks up contiguous physical
157 * addresses being used for DRAM, creating a "hole".
158 *
159 * DATA FABRIC
160 *
161 * The data fabric (DF) is the primary interface that different parts of
162 * the system use to communicate with one another. This includes the I/O
163 * engines (where PCIe traffic goes), CPU caches and their cores, memory
164 * channels, cross-socket communication, and a whole lot more. The first
165 * part of decoding addresses and figuring out which DRAM channel an
166 * address should be directed to all come from the data fabric.
167 *
168 * The data fabric is comprised of instances. So there is one instance for
169 * each group of cores, each memory channel, etc. Each instance has its own
170 * independent set of register information. As the data fabric is a series
171 * of devices exposed over PCI, if you do a normal PCI configuration space
172 * read or write that'll end up broadcasting the I/O. Instead, to access a
173 * particular instance's register information there is an indirect access
174 * mechanism. The primary way that this driver accesses data fabric
175 * registers is via these indirect reads.
176 *
177 * There is one instance of the Data Fabric per socket starting with Zen 2.
178 * In Zen 1, there was one instance of the data fabric per CCD -- core
179 * complex die (see cpuid.c's big theory statement for more information).
180 *
181 * DF INSTANCE ID
182 *
183 * A DF instance ID is an identifier for a single entity or component in a
184 * data fabric. The set of instance IDs is unique only with a single data
185 * fabric. So for example, each memory channel, I/O endpoint (e.g. PCIe
186 * logic), group of cores, has its own instance ID. Anything within the
187 * same data fabric (e.g. the same die) can be reached via its instance ID.
188 * The instance ID is used to indicate which instance to contact when
189 * performing indirect accesses.
190 *
191 * Not everything that has an instance ID will be globally routable (e.g.
192 * between multiple sockets). For things that are, such as the memory
193 * channels and coherent core initiators, there is a second ID called a
194 * fabric ID.
195 *
196 * DF FABRIC ID
197 *
198 * A DF fabric ID is an identifier that combines information to indicate
199 * both which instance of the data fabric a component is on and a component
200 * itself. So with this number you can distinguish between a memory channel
201 * on one of two sockets. A Fabric ID is made up of two parts. The upper
202 * part indicates which DF we are talking to and is referred to as a Node
203 * ID. The Node ID is itself broken into two parts: one that identifies a
204 * socket, and one that identifies a die. The lower part of a fabric ID is
205 * called a component ID and indicates which component in a particular data
206 * fabric that we are talking to. While only a subset of the total
207 * components in the data fabric are routable, for everything that is, its
208 * component ID matches its instance ID.
209 *
210 * Put differently, the component portion of a fabric ID and a component's
211 * instance ID are always the same for routable entities. For things which
212 * cannot be routed, they only have an instance ID and no fabric ID.
213 * Because this code is always interacting with data fabric components that
214 * are routable, sometimes instance ID and the component ID portion of the
215 * data fabric ID may be used interchangeably.
216 *
217 * Finally, it's worth calling out that the number of bits that are used to
218 * indicate the socket, die, and component in a fabric ID changes from
219 * hardware generation to hardware generation.
220 *
221 * Inside the code here, the socket and die decomposition information is
222 * always relative to the node ID. AMD phrases the decomposition
223 * information in terms of a series of masks and shifts. This is
224 * information that can be retrieved from the data fabric itself, allowing
225 * us to avoid hardcoding too much information other than which registers
226 * actually have which fields. With both masks and shifts, it's important
227 * to establish which comes first. We follow AMD's convention and always
228 * apply masks before shifts. With that, let's look at an example of a
229 * made up bit set:
230 *
231 * Assumptions (to make this example simple):
232 * o The fabric ID is 16 bits
233 * o The component ID is 8 bits
234 * o The node ID is 8 bits
235 * o The socket and die ID are both 4 bits
236 *
237 * Here, let's say that we have the ID 0x2106. This decomposes into a
238 * socket 0x2, die 0x1, and component 0x6. Here is how that works in more
239 * detail:
240 *
241 * 0x21 0x06
242 * |------| |------|
243 * Node ID Component ID
244 * Mask: 0xff00 0x00ff
245 * Shift: 8 0
246 *
247 * Next we would decompose the Node ID as:
248 * 0x2 0x1
249 * |------| |------|
250 * Sock ID Die ID
251 * Mask: 0xf0 0x0f
252 * Shift: 4 0
253 *
254 * Composing a fabric ID from its parts would work in a similar way by
255 * applying masks and shifts.
256 *
257 * NORMAL ADDRESS
258 *
259 * A normal address is one of the primary address types that AMD uses in
260 * memory decoding. It takes into account the DRAM hole, interleave
261 * settings, and is basically the address that is dispatched to the broader
262 * data fabric towards a particular DRAM channel.
263 *
264 * Often, phrases like 'normalizing the address' or normalization refer to
265 * the process of transforming a system address into the channel address.
266 *
267 * INTERLEAVING
268 *
269 * The idea of interleaving is to take a contiguous range and weave it
270 * between multiple different actual entities. Generally certain bits in
271 * the range are used to select one of several smaller regions. For
272 * example, if you have 8 regions each that are 4 GiB in size, that creates
273 * a single 32 GiB region. You can use three bits in that 32 GiB space to
274 * select one of the 8 regions. For a more visual example, see the
275 * definition of this in uts/intel/io/imc/imc.c.
276 *
277 * CHANNEL
278 *
279 * A channel is used to refer to a single memory channel. This is sometimes
280 * called a DRAM channel as well. A channel operates in a specific mode
281 * based on the JEDEC DRAM standards (e.g. DDR4, LPDDR5, etc.). A
282 * (LP)DDR4/5 channel may support up to two DIMMs inside the channel. The
283 * number of slots is platform dependent and from there the number of DIMMs
284 * installed can vary. Generally speaking, a DRAM channel defines a set
285 * number of signals, most of which go to all DIMMs in the channel, what
286 * varies is which "chip-select" is activated which causes a given DIMM to
287 * pay attention or not.
288 *
289 * DIMM
290 *
291 * A DIMM refers to a physical hardware component that is installed into a
292 * computer to provide access to dynamic memory. Originally this stood for
293 * dual-inline memory module, though the DIMM itself has evolved beyond
294 * that. A DIMM is organized into various pages, which are addressed by
295 * a combination of rows, columns, banks, bank groups, and ranks. How this
296 * fits together changes from generation to generation and is standardized
297 * in something like DDR4, LPDDR4, DDR5, LPDDR5, etc. These standards
298 * define the general individual modules that are assembled into a DIMM.
299 * There are slightly different standards for combined memory modules
300 * (which is what we use the term DIMM for). Examples of those include
301 * things like registered DIMMs (RDIMMs).
302 *
303 * A DDR4 DIMM contains a single channel that is 64-bits wide with 8 check
304 * bits. A DDR5 DIMM has a notable change in this scheme from earlier DDR
305 * standards. It breaks a single DDR5 DIMM into two sub-channels. Each
306 * sub-channel is independently addressed and contains 32-bits of data and
307 * 8-bits of check data.
308 *
309 * ROW AND COLUMN
310 *
311 * The most basic building block of a DIMM is a die. A DIMM consists of
312 * multiple dies that are organized together (we'll discuss the
313 * organization next). A given die is organized into a series of rows and
314 * columns. First, one selects a row. At which point one is able to select
315 * a specific column. It is more expensive to change rows than columns,
316 * leading a given row to contain approximately 1 KiB of data spread across
317 * its columns. The exact size depends on the device. Each row/column is a
318 * series of capacitors and transistors. The transistor is used to select
319 * data from the capacitor and the capacitor actually contains the logical
320 * 0/1 value.
321 *
322 * BANKS AND BANK GROUPS
323 *
324 * An individual DRAM die is organized in something called a bank. A DIMM
325 * has a number of banks that sit in series. These are then grouped into
326 * larger bank groups. Generally speaking, each bank group has the same
327 * number of banks. Let's take a look at an example of a system with 4
328 * bank groups, each with 4 banks.
329 *
330 * +-----------------------+ +-----------------------+
331 * | Bank Group 0 | | Bank Group 1 |
332 * | +--------+ +--------+ | | +--------+ +--------+ |
333 * | | Bank 0 | | Bank 1 | | | | Bank 0 | | Bank 1 | |
334 * | +--------+ +--------+ | | +--------+ +--------+ |
335 * | +--------+ +--------+ | | +--------+ +--------+ |
336 * | | Bank 2 | | Bank 3 | | | | Bank 2 | | Bank 3 | |
337 * | +--------+ +--------+ | | +--------+ +--------+ |
338 * +-----------------------+ +-----------------------+
339 *
340 * +-----------------------+ +-----------------------+
341 * | Bank Group 2 | | Bank Group 3 |
342 * | +--------+ +--------+ | | +--------+ +--------+ |
343 * | | Bank 0 | | Bank 1 | | | | Bank 0 | | Bank 1 | |
344 * | +--------+ +--------+ | | +--------+ +--------+ |
345 * | +--------+ +--------+ | | +--------+ +--------+ |
346 * | | Bank 2 | | Bank 3 | | | | Bank 2 | | Bank 3 | |
347 * | +--------+ +--------+ | | +--------+ +--------+ |
348 * +-----------------------+ +-----------------------+
349 *
350 * On a DIMM, only a single bank and bank group can be active at a time for
351 * reading or writing an 8 byte chunk of data. However, these are still
352 * pretty important and useful because of the time involved to switch
353 * between them. It is much cheaper to switch between bank groups than
354 * between banks and that time can be cheaper than activating a new row.
355 * This allows memory controllers to pipeline this substantially.
356 *
357 * RANK AND CHIP-SELECT
358 *
359 * The next level of organization is a rank. A rank is effectively an
360 * independent copy of all the bank and bank groups on a DIMM. That is,
361 * there are additional copies of the DIMM's organization, but not the data
362 * itself. Originally a
363 * single or dual rank DIMM was built such that one copy of everything was
364 * on each physical side of the DIMM. As the number of ranks has increased
365 * this has changed as well. Generally speaking, the contents of the rank
366 * are equivalent. That is, you have the same number of bank groups, banks,
367 * and each bank has the same number of rows and columns.
368 *
369 * Ranks are selected by what's called a chip-select, often abbreviated as
370 * CS_L in the various DRAM standards. AMD also often abbreviates this as a
371 * CS (which is not to be confused with the DF class of device called a
372 * CS). These signals are used to select a rank to activate on a DIMM.
373 * There are some number of these for each DIMM which is how the memory
374 * controller chooses which of the DIMMs it's actually going to activate in
375 * the system.
376 *
377 * One interesting gotcha here is how AMD organizes things. Each DIMM
378 * logically is broken into two chip-selects in hardware. Between DIMMs
379 * with more than 2 ranks and 3D stacked RDIMMs, there are ways to
380 * potentially activate more bits. Ultimately these are mapped to a series
381 * of rank multiplication logic internally. These ultimately then control
382 * some of these extra pins, though the exact method isn't 100% clear at
383 * this time.
384 *
385 * -----------------------
386 * Rough Hardware Process
387 * -----------------------
388 *
389 * To better understand how everything is implemented and structured, it's worth
390 * briefly describing what happens when hardware wants to read a given physical
391 * address. This is roughly summarized in the following chart. In the left hand
392 * side is the type of address, which is transformed and generally shrinks along
393 * the way. Next to it is the actor that is taking action and the type of
394 * address that it starts with.
395 *
396 * +---------+ +------+
397 * | Virtual | | CPU |
398 * | Address | | Core |
399 * +---------+ +------+
400 * | | The CPU core receives a memory request and then
401 * | * . . . . determines whether this request is DRAM or MMIO
402 * | | (memory-mapped I/O) and then sends it to the data
403 * v v fabric.
404 * +----------+ +--------+
405 * | Physical | | Data |
406 * | Address | | Fabric |
407 * +----------+ +--------+
408 * | | The data fabric instance in the CCX/D uses the
409 * | * . . . . programmed DRAM rules to determine what DRAM
410 * | | channel to direct a request to and what the
411 * | | channel-relative address is. It then sends the
412 * | | request through the fabric. Note, the number of
413 * | | DRAM rules varies based on the processor SoC.
414 * | | Server parts like Milan have many more rules than
415 * | | an APU like Cezanne. The DRAM rules tell us both
416 * v v how to find and normalize the physical address.
417 * +---------+ +---------+
418 * | Channel | | DRAM |
419 * | Address | | Channel |
420 * +---------+ +---------+
421 * | | The UMC (unified memory controller) receives the
422 * | * . . . . DRAM request and determines which DIMM to send
423 * | | the request to along with the rank, banks, row,
424 * | | column, etc. It initiates a DRAM transaction and
425 * | | then sends the results back through the data
426 * v v fabric to the CPU core.
427 * +---------+ +--------+
428 * | DIMM | | Target |
429 * | Address | | DIMM |
430 * +---------+ +--------+
431 *
432 * The above is all generally done in hardware. There are multiple steps
433 * internal to this that we end up mimicking in software. This includes things
434 * like, applying hashing logic, address transformations, and related.
435 * Thankfully the hardware is fairly generic and programmed with enough
436 * information that we can pull out to figure this out. The rest of this theory
437 * statement covers the major parts of this: interleaving, the act of
438 * determining which memory channel to actually go to, and normalization, the
439 * act of removing some portion of the physical address bits to determine the
440 * address relative to a channel.
441 *
442 * ------------------------
443 * Data Fabric Interleaving
444 * ------------------------
445 *
446 * One of the major parts of address decoding is to understand how the
447 * interleaving features work in the data fabric. This is used to allow an
448 * address range to be spread out between multiple memory channels and then,
449 * later on, when normalizing the address. As mentioned above, a system address
450 * matches a rule which has information on interleaving. Interleaving comes in
451 * many different flavors. It can be used to just switch between channels,
452 * sockets, and dies. It can also end up involving some straightforward and some
453 * fairly complex hashing operations.
454 *
455 * Each DRAM rule has instructions on how to perform this interleaving. The way
456 * this works is that the rule first says to start at a given address bit,
457 * generally ranging from bit 8-12. These influence the granularity of the
458 * interleaving going on. From there, the rules determine how many bits to use
459 * from the address to determine the die, socket, and channel. In the simplest
460 * form, these perform a log2 of the actual number of things you're interleaving
461 * across (we'll come back to non-powers of two). So let's work a few common
462 * examples:
463 *
464 * o 8-channel interleave, 1-die interleave, 2-socket interleave
465 * Start at bit 9
466 *
467 * In this case we have 3 bits that determine the channel to use, 0 bits
468 * for the die, 1 bit for the socket. Here we would then use the following
469 * bits to determine what the channel, die, and socket IDs are:
470 *
471 * [12] - Socket ID
472 * [11:9] - Channel ID
473 *
474 * You'll note that there was no die-interleave, which means the die ID is
475 * always zero. This is the general thing you expect to see in Zen 2 and 3
476 * based systems as they only have one die or a Zen 1 APU.
477 *
478 * o 2-channel interleave, 4-die interleave, 2-socket interleave
479 * Start at bit 10
480 *
481 * In this case we have 1 bit for the channel and socket interleave. We
482 * have 2 bits for the die. This is something you might see on a Zen 1
483 * system. This results in the following bits:
484 *
485 * [13] - Socket ID
486 * [12:11] - Die ID
487 * [10] - Channel ID
488 *
489 *
490 * COD, NPS, and MI3H HASHING
491 *
492 * However, this isn't the only primary extraction rule of the above values. The
493 * other primary method is using a hash. While the exact hash methods vary
494 * between Zen 2/3 and Zen 4 based systems, they follow a general scheme. In the
495 * system there are three interleaving configurations that are either global or
496 * enabled on a per-rule basis. These indicate whether one should perform the
497 * XOR computation using addresses at:
498 *
499 * o 64 KiB (starting at bit 16)
500 * o 2 MiB (starting at bit 21)
501 * o 1 GiB (starting at bit 30)
502 *
503 * In this world, you take the starting address bit defined by the rule and XOR
504 * it with each enabled interleave address. If you have more than one bit to
505 * select (e.g. because you are hashing across more than 2 channels), then you
506 * continue taking subsequent bits from each enabled region. So the second bit
507 * would use 17, 21, and 31 if all three ranges were enabled while the third bit
508 * would use 18, 22, and 32. While these are straightforward, there is a catch.
509 *
510 * While the DRAM rule contains what the starting address bit, you don't
511 * actually use subsequent bits in the same way. Instead subsequent bits are
512 * deterministic and use bits 12 and 13 from the address. This is not the same
513 * consecutive thing that one might expect. Let's look at a Rome/Milan based
514 * example:
515 *
516 * o 8-channel "COD" hashing, starting at address 9. All three ranges enabled.
517 * 1-die and 1-socket interleaving.
518 *
519 * In this model we are using 3 bits for the channel, 0 bits for the socket
520 * and die.
521 *
522 * Channel ID[0] = addr[9] ^ addr[16] ^ addr[21] ^ addr[30]
523 * Channel ID[1] = addr[12] ^ addr[17] ^ addr[22] ^ addr[31]
524 * Channel ID[2] = addr[13] ^ addr[18] ^ addr[23] ^ addr[32]
525 *
526 * So through this scheme we'd have a socket/die of 0, and then the channel
527 * ID is computed based on that. The number of bits that we use here
528 * depends on how many channels the hash is going across.
529 *
530 * The Genoa and related variants, termed "NPS", has a few wrinkles. First,
531 * rather than 3 bits being used for the channel, up to 4 bits are. Second,
532 * while the Rome/Milan "COD" hash above does not support socket or die
533 * interleaving, the "NPS" hash actually supports socket interleaving. However,
534 * unlike the straightforward non-hashing scheme, the first bit is used to
535 * determine the socket when enabled as opposed to the last one. In addition, if
536 * we're not performing socket interleaving, then we end up throwing address bit
537 * 14 into the mix here. Let's look at examples:
538 *
539 * o 4-channel "NPS" hashing, starting at address 8. All three ranges enabled.
540 * 1-die and 1-socket interleaving.
541 *
542 * In this model we are using 2 bits for the channel, 0 bits for the socket
543 * and die. Because socket interleaving is not being used, bit 14 ends up
544 * being added into the first bit of the channel selection. Presumably this
545 * is to improve the address distribution in some form.
546 *
547 * Channel ID[0] = addr[8] ^ addr[16] ^ addr[21] ^ addr[30] ^ addr[14]
548 * Channel ID[1] = addr[12] ^ addr[17] ^ addr[22] ^ addr[31]
549 *
550 * o 8-channel "NPS" hashing, starting at address 9. All three ranges enabled.
551 * 1-die and 2-socket interleaving.
552 *
553 * In this model we are using 3 bits for the channel and 1 for the socket.
554 * The die is always set to 0. Unlike the above, address bit 14 is not used
555 * because it ends up being required for the 4th address bit.
556 *
557 * Socket ID[0] = addr[9] ^ addr[16] ^ addr[21] ^ addr[30]
558 * Channel ID[0] = addr[12] ^ addr[17] ^ addr[22] ^ addr[31]
559 * Channel ID[1] = addr[13] ^ addr[18] ^ addr[23] ^ addr[32]
560 * Channel ID[2] = addr[14] ^ addr[19] ^ addr[24] ^ addr[33]
561 *
562 * DF 4D2 NPS 1K/2K
563 *
564 * In our DF 4D2 variant, the interleave controls were changed and the way that
565 * hashes work is different. There are two main families here, a variant on the
566 * prior NPS hashing that is either NPS 1K or NPS 2K and the MI300 variant that
567 * we call MI3H. First, there are two additional address ranges that have been
568 * added:
569 *
570 * o 4 KiB (starting at bit 12)
571 * o 1 TiB (starting at bit 40)
572 *
573 * Of these, our understanding is that the 4 KiB range is only used for MI3H
574 * based hashing. When it is used, only bits 12-14 will be used, but that's
575 * because the hash algorithm for the MI3H series is, well, unique. The 1T
576 * otherwise works somewhat as normal. Currently we don't support the MI3H
577 * decoding, but know that it exists in the code so we can provide a better
578 * error code.
579 *
580 * The NPS 1K/2K hashes use a similar style. These are designed to support up to
581 * 32 channel hashes, which causes up to 5 bits to be used. The 5 bit form is
582 * only supported in the 1K variant. It starts at bit 8 (the nominally required
583 * starting interleave address) and then uses bit 9, before jumping up to bits
584 * 12-14 as required. The XOR addresses count up in a similar fashion. So the 64
585 * KiB interleave would use up to bits 16-20 in this scheme (corresponding to
586 * result bits 0-4).
587 *
588 * When the 2K form is used, only 4 bits are supported and the entire bit 9 row
589 * is ignored. This looks very similar to the NPS form; however, the gap is also
590 * there in the XOR bits and there is no longer the question of using bit 14 or
591 * not with socket interleaving. It is only ever used if we need the 5th channel
592 * bit. To see the difference let's look at two examples where the only
593 * difference between the two is whether we are using 1 or 2K hashing.
594 *
595 * o 8-channel "NPS" 1K hashing, starting at address 8. 64 KiB, 2 MiB, 1 GiB,
596 * and 1 TiB are enabled. 1-die and 1-socket.
597 *
598 * In this model, there are always 3 bits for the channel. This means that
599 * we only will use bits 8, 9, and 12 from the address to start with.
600 *
601 * Channel ID[0] = addr[8] ^ addr[16] ^ addr[21] ^ addr[30]
602 * Channel ID[1] = addr[9] ^ addr[17] ^ addr[22] ^ addr[31]
603 * Channel ID[2] = addr[12] ^ addr[18] ^ addr[23] ^ addr[32]
604 *
605 * o 8-channel "NPS" 2K hashing, starting at address 8. 64 KiB, 2 MiB, 1 GiB,
606 * and 1 TiB are enabled. 1-die and 1-socket.
607 *
608 * In this model, we also use 3 bits for the channel. However, we no longer
609 * use bit 9, which is the 1K mode only. Similarly, you'll see that the bits
610 * from the hash that would have been used for determining interleaving with
611 * bit 9 are skipped entirely. This is why the 1K/2K variants are
612 * incompatible with the original NPS hashing.
613 *
614 * Channel ID[0] = addr[8] ^ addr[16] ^ addr[21] ^ addr[30]
615 * Channel ID[1] = addr[12] ^ addr[18] ^ addr[23] ^ addr[32]
616 * Channel ID[2] = addr[13] ^ addr[19] ^ addr[24] ^ addr[33]
617 *
618 * ZEN 3 6-CHANNEL
619 *
620 * These were the simple cases. Things get more complex when we move to
621 * non-power of 2 based hashes between channels. There are two different sets of
622 * these schemes. The first of these is 6-channel hashing that was added in Zen
623 * 3. The second of these is a more complex and general form that was added in
624 * Zen 4. Let's start with the Zen 3 case. The Zen 3 6-channel hash requires
625 * starting at address bits 11 or 12 and varies its logic somewhat from there.
626 * In the 6-channel world, the socket and die interleaving must be disabled.
627 * Let's walk through an example:
628 *
629 * o 6-channel Zen 3, starting at address 11. 2M and 1G range enabled.
630 * 1-die and 1-socket interleaving.
631 *
632 * Regardless of the starting address, we will always use three bits to
633 * determine a channel address. However, it's worth calling out that the
634 * 64K range is not considered for this at all. Another oddity is that when
635 * calculating the hash bits the order of the extracted 2M and 1G addresses
636 * are different.
637 *
638 * This flow starts by calculating the three hash bits. This is defined
639 * below. In the following, all bits marked with an '@' are ones that will
640 * change when starting at address bit 12. In those cases the value will
641 * increase by 1. Here's how we calculate the hash bits:
642 *
643 * hash[0] = addr[11@] ^ addr[14@] ^ addr[23] ^ addr[32]
644 * hash[1] = addr[12@] ^ addr[21] ^ addr[30]
645 * hash[2] = addr[13@] ^ addr[22] ^ addr[31]
646 *
647 * With this calculated, we always assign the first bit of the channel
648 * based on the hash. The other bits are more complicated as we have to
649 * deal with that gnarly power of two problem. We determine whether or not
650 * to use the hash bits directly in the channel based on their value. If
651 * they are not equal to 3, then we use it, otherwise if they are, then we
652 * need to go back to the physical address and we take its modulus.
653 * Basically:
654 *
655 * Channel Id[0] = hash[0]
656 * if (hash[2:1] == 3)
657 * Channel ID[2:1] = (addr >> [11@+3]) % 3
658 * else
659 * Channel ID[2:1] = hash[2:1]
660 *
661 *
662 * ZEN 4 NON-POWER OF 2
663 *
664 * I hope you like modulus calculations, because things get even more complex
665 * here now in Zen 4 which has many more modulus variations. These function in a
666 * similar way to the older 6-channel hash in Milan. They require one to start
667 * at address bit 8, they require that there is no die interleaving, and they
668 * support socket interleaving. The different channel arrangements end up in one
669 * of two sets of modulus values: a mod % 3 and a mod % 5 based on the number
670 * of channels used. Unlike the Milan form, all three address ranges (64 KiB, 2
671 * MiB, 1 GiB) are allowed to be used.
672 *
673 * o 6-channel Zen 4, starting at address 8. 64K, 2M, and 1G range enabled.
674 * 1-die and 2-socket interleaving.
675 *
676 * We start by calculating the following set of hash bits regardless of
677 * the number of channels that exist. The set of hash bits that is actually
678 * used in various computations ends up varying based upon the number of
679 * channels used. In 3-5 configs, only hash[0] is used. 6-10, both hash[0]
680 * and hash[2] (yes, not hash[1]). The 12 channel config uses all three.
681 *
682 * hash[0] = addr[8] ^ addr[16] ^ addr[21] ^ addr[30] ^ addr[14]
683 * hash[1] = addr[12] ^ addr[17] ^ addr[22] ^ addr[31]
684 * hash[2] = addr[13] ^ addr[18] ^ addr[23] ^ addr[32]
685 *
686 * Unlike other schemes where bits directly map here, they instead are used
687 * to seed the overall value. Depending on whether hash[0] is a 0 or 1, the
688 * system goes through two different calculations entirely. Though all of
689 * them end up involving the remainder of the system address going through
690 * the modulus. In the following, a '3@' indicates the modulus value would
691 * be swapped to 5 in a different scenario.
692 *
693 * Channel ID = addr[63:14] % 3@
694 * if (hash[0] == 1)
695 * Channel ID = (Channel ID + 1) % 3@
696 *
697 * Once this base has for the channel ID has been calculated, additional
698 * portions are added in. As this is the 6-channel form, we say:
699 *
700 * Channel ID = Channel ID + (hash[2] * 3@)
701 *
702 * Finally the socket is deterministic and always comes from hash[0].
703 * Basically:
704 *
705 * Socket ID = hash[0]
706 *
707 * o 12-channel Zen 4, starting at address 8. 64K, 2M, and 1G range enabled.
708 * 1-die and 1-socket interleaving.
709 *
710 * This is a variant of the above. The hash is calculated the same way.
711 * The base Channel ID is the same and if socket interleaving were enabled
712 * it would also be hash[0]. What instead differs is how we use hash[1]
713 * and hash[2]. The following logic is used instead of the final
714 * calculation above.
715 *
716 * Channel ID = Channel ID + (hash[2:1] * 3@)
717 *
718 * NPS 1K/2K NON-POWER of 2
719 *
720 * Just as the normal hashing changed with the introduction of the 1K/2K
721 * variants, so does the non-power of 2 hashing. This NP2 scheme is rather
722 * different than the base Zen 4 one. This uses the 64 KiB, 2 MiB, 1 GiB, and 1
723 * TiB ranges for hashing. Logically there are both 3 and 5 channel hashes again
724 * like Zen 4 and when socket interleaving is enabled, address bit 8 is always
725 * going to the socket.
726 *
727 * The 1K and 2K modes change which addresses are used and considered just like
728 * the non-NP2 case. The same interleave bit skipping for 2K still applies,
729 * meaning bit 9 will not be used for hashing and will instead be part of the
730 * normal address calculations that we have.
731 *
732 * Like in the Zen 4 case, we are going to be constructing our normalized
733 * address from three regions of bits. The low region which is everything that
734 * is used before the hashing, the bits skipped in the middle, and then the
735 * upper bits that have been untouched. These are not rearranged, rather its
736 * best to think of it as bits are removed from this, causing shifts and
737 * shrinks.
738 *
739 * Another important difference to call out before we get to examples is that
740 * each variant here uses a different address range as the upper portion to use.
741 * Unfortunately, where as for Zen 4 we had some regular rules, each of these
742 * cases seems rather different. However, there is some general logic which is
743 * that in each case we calculate some modulus value from different addresses
744 * which we use to determine the channel, sometimes mixed with other hash bits.
745 * Then we calculate a new normalized address by taking the divisor as the high
746 * portion. Let's look at some examples here:
747 *
748 * o 12 Channel 1K Zen 5, starting at address 8. 64K, 2M, 1G, and 1T ranges
749 * enabled. 1-die and 1-socket interleaving.
750 *
751 * This 12 channel mode is a modulus 3 case. This particular case needs two
752 * hash bits. Because it is a 1K mode it uses bits 8 and 9. If we were in a
753 * 2K mode, we'd use bits 8 and 12. Bit 8 always also hashes in bit 14 just
754 * like the Zen 4 case.
755 *
756 * hash[0] = addr[8] ^ addr[16] ^ addr[21] ^ addr[30] ^ addr[40] ^
757 * addr[14]
758 * hash[1] = addr[9] ^ addr[17] ^ addr[22] ^ addr[31] ^ addr[41]
759 *
760 * Now that we have that, it's time to calculate the address we need to
761 * take the modulus of to stick into the channel. For this particular case,
762 * we construct an address as PA >> 12 | 0b00. In other words we take bits
763 * [48+, 12] and move them to bit 2. Once we have that, we can go ahead and
764 * construct the value modulus 3. Symbolically:
765 *
766 * modAddr = (addr[64:12] & ~3) | 0b00 (or (addr >> 12) << 2)
767 * modVal = modAddr % 3
768 *
769 * Channel ID[0] = hash[0]
770 * Channel ID[1] = hash[1]
771 * Channel ID[2] = modval[0]
772 * Channel ID[3] = modval[1]
773 *
774 * In the 2K version we use (addr[64:13] & ~7) | 0b000 and hash[1] is based
775 * on addr[12] rather than addr[9].
776 *
777 * o 5 Channel 2K Zen 5, starting at address 8. 64K, 2M, 1G, and 1T ranges
778 * enabled. 1-die and 1-socket interleaving.
779 *
780 * With the 5-channel based mode we now will working modulus five rather
781 * than three. In this case, we have similar logic, except the way the
782 * address is constructed to take the mod of is different. We can think of
783 * this as:
784 *
785 * modAddr = addr[64:12] | addr[8] | 0b0
786 * modVal = modAddr % 5
787 *
788 * Channel ID[0] = modVal[0]
789 * Channel ID[1] = modVal[1]
790 * Channel ID[2] = modVal[2]
791 *
792 * Basically this ends up using a rather similar logical construction;
793 * however, the values that it plugs in are different. Note, that there was
794 * no use of the hash in this case.
795 *
796 * POST BIT EXTRACTION
797 *
798 * Now, all of this was done to concoct up a series of indexes used. However,
799 * you'll note that a given DRAM rule actually already has a fabric target. So
800 * what do we do here? We add them together.
801 *
802 * The data fabric has registers that describe which bits in a fabric ID
803 * correspond to a socket, die, and channel. Taking the channel, die, and socket
804 * IDs above, one can construct a fabric ID. From there, we add the two data
805 * fabric IDs together and can then get to the fabric ID of the actual logical
806 * target. This is why all of the socket and die interleaving examples with no
807 * interleaving are OK to result in a zero. The idea here is that the base
808 * fabric ID in the DRAM rule will take care of indicating those other things as
809 * required.
810 *
811 * You'll note the use of the term "logical target" up above. That's because
812 * some platforms have the ability to remap logical targets to physical targets
813 * (identified by the use of the ZEN_UMC_FAM_F_TARG_REMAP flag in the family
814 * data or the DF::DfCapability register once we're at the DF 4D2 variant). The
815 * way that remapping works changes based on the hardware generation. This was
816 * first added in Milan (Zen 3) CPUs. In that model, you would use the socket
817 * and component information from the target ID to identify which remapping
818 * rules to use. On Genoa (Zen 4) CPUs, you would instead use information in the
819 * rule itself to determine which of the remap rule sets to use and then uses
820 * the component ID to select which rewrite rule to use.
821 *
822 * Finally, there's one small wrinkle with this whole scheme that we haven't
823 * discussed: what actually is the address that we plug into this calculation.
824 * While you might think it actually is just the system address itself, that
825 * isn't actually always the case. Sometimes rather than using the address
826 * itself, it gets normalized based on the DRAM rule, which involves subtracting
827 * out the base address and potentially subtracting out the size of the DRAM
828 * hole (if the address is above the hole and hoisting is active for that
829 * range). When this is performed appears to tie to the DF generation. The
830 * following table relates the DF generation to our behavior:
831 *
832 * o DF 2 (Zen 1): Use the raw address
833 * o DF 3 (Zen 2-3): Use the raw address if it's not a power of 2
834 * o DF 3.5: Use the adjusted address
835 * o DF 4 (Zen 4): Use the adjusted address
836 * o DF 4D2 (Zen 4/5): Use the raw address
837 *
838 * --------------------------------------------
839 * Data Fabric Interleave Address Normalization
840 * --------------------------------------------
841 *
842 * While you may have thought that we were actually done with the normalization
843 * fun in the last section, there's still a bit more here that we need to
844 * consider. In particular, there's a secondary transformation beyond
845 * interleaving that occurs as part of constructing the channel normalized
846 * address. Effectively, we need to account for all the bits that were used in
847 * the interleaving and generally speaking remove them from our normalized
848 * address.
849 *
850 * While this may sound weird on paper, the way to think about it is that
851 * interleaving at some granularity means that each device is grabbing the same
852 * set of addresses, the interleave just is used to direct it to its own
853 * location. When working with a channel normalized address, we're effectively
854 * creating a new region of addresses that have meaning within the DIMMs
855 * themselves. The channel doesn't care about what got it there, mainly just
856 * what it is now. So with that in mind, we need to discuss how we remove all
857 * the interleaving information in our different modes.
858 *
859 * Just to make sure it's clear, we are _removing_ all bits that were used for
860 * interleaving. This causes all bits above the removed ones to be shifted
861 * right.
862 *
863 * First, we have the case of standard power of 2 interleaving that applies to
864 * the 1, 2, 4, 8, 16, and 32 channel configurations. Here, we need to account
865 * for the total number of bits that are used for the channel, die, and socket
866 * interleaving and we simply remove all those bits starting from the starting
867 * address.
868 *
869 * o 8-channel interleave, 1-die interleave, 2-socket interleave
870 * Start at bit 9
871 *
872 * If we look at this example, we are using 3 bits for the channel, 1 for
873 * the socket, for a total of 4 bits. Because this is starting at bit 9,
874 * this means that interleaving covers the bit range [12:9]. In this case
875 * our new address would be (orig[63:13] >> 4) | orig[8:0].
876 *
877 *
878 * COD and NPS HASHING
879 *
880 * That was the simple case, next we have the COD/NPS hashing case that we need
881 * to consider. If we look at these, the way that they work is that they split
882 * which bits they use for determining the channel address and then hash others
883 * in. Here, we need to extract the starting address bit, then continue at bit
884 * 12 based on the number of bits in use and whether or not socket interleaving
885 * is at play for the NPS variant. Let's look at an example here:
886 *
887 * o 8-channel "COD" hashing, starting at address 9. All three ranges enabled.
888 * 1-die and 1-socket interleaving.
889 *
890 * Here we have three total bits being used. Because we start at bit 9, this
891 * means we need to drop bits [13:12], [9]. So our new address would be:
892 *
893 * orig[63:14] >> 3 | orig[11:10] >> 1 | orig[8:0]
894 * | | +-> stays the same
895 * | +-> relocated to bit 9 -- shifted by 1 because we
896 * | removed bit 9.
897 * +--> Relocated to bit 11 -- shifted by 3 because we removed bits, 9, 12,
898 * and 13.
899 *
900 * o 8-channel "NPS" hashing, starting at address 8. All three ranges enabled.
901 * 1-die and 2-socket interleaving.
902 *
903 * Here we need to remove bits [14:12], [8]. We're removing an extra bit
904 * because we have 2-socket interleaving. This results in a new address of:
905 *
906 * orig[63:15] >> 4 | orig[11:9] >> 1 | orig[7:0]
907 * | | +-> stays the same
908 * | +-> relocated to bit 8 -- shifted by 1 because we
909 * | removed bit 8.
910 * +--> Relocated to bit 11 -- shifted by 4 because we removed bits, 8, 12,
911 * 13, and 14.
912 *
913 * NPS 1K/2K Hashing
914 *
915 * This case is a fairly straightforward variant on what we just discussed. In
916 * fact, 2K hashing looks just like what we've done before. The only difference
917 * with 1K hashing is that we'll consider bit 9 also for removal before we jump
918 * up to bit 12. Let's look at an example:
919 *
920 * o 8-channel "NPS" 1K hashing, starting at address 8. All three ranges
921 * enabled. 1-die and 2-socket interleaving.
922 *
923 * Here we need to remove a total of 4 bits, which is now broken into
924 * [13:12] and [9:8]. This results in a new address of:
925 *
926 * orig[63:14] >> 4 | orig[11:10] >> 2 | orig[7:0]
927 * | | +-> stays the same
928 * | +-> relocated to bit 8 -- shifted by 2 because we
929 * | removed bits 8 and 9.
930 * +--> Relocated to bit 11 -- shifted by 4 because we removed bits, 8, 9,
931 * 12, and 13.
932 *
933 * ZEN 3 6-CHANNEL
934 *
935 * Now, to the real fun stuff, our non-powers of two. First, let's start with
936 * our friend, the Zen 3 6-channel hash. So, the first thing that we need to do
937 * here is start by recomputing our hash again based on the current normalized
938 * address. Regardless of the hash value, this first removes all three bits from
939 * the starting address, so that's removing either [14:12] or [13:11].
940 *
941 * The rest of the normalization process here is quite complex and somewhat mind
942 * bending. Let's start working through an example here and build this up.
943 * First, let's assume that each channel has a single 16 GiB RDIMM. This would
944 * mean that the channel itself has 96 GiB RDIMM. However, by removing 3 bits
945 * worth, that technically corresponds to an 8-channel configuration that
946 * normally suggest a 128 GiB configuration. The processor requires us to record
947 * this fact in the DF::Np2ChannelConfig register. The value that it wants us a
948 * bit weird. We believe it's calculated by the following:
949 *
950 * 1. Round the channel size up to the next power of 2.
951 * 2. Divide this total size by 64 KiB.
952 * 3. Determine the log base 2 that satisfies this value.
953 *
954 * In our particular example above. We have a 96 GiB channel, so for (1) we end
955 * up with 128 GiB (2^37). We now divide that by 64 KiB (2^16), so this becomes
956 * 2^(37 - 16) or 2^21. Because we want the log base 2 of 2^21 from (2), this
957 * simply becomes 21. The DF::Np2ChannelConfig has two members, a 'space 0' and
958 * 'space 1'. Near as we can tell, in this mode only 'space 0' is used.
959 *
960 * Before we get into the actual normalization scheme, we have to ask ourselves
961 * how do we actually interleave data 6 ways. The scheme here is involved.
962 * First, it's important to remember like with other normalization schemes, we
963 * do adjust for the address for the base address in the DRAM rule and then also
964 * take into account the DRAM hole if present.
965 *
966 * If we delete 3 bits, let's take a sample address and see where it would end
967 * up in the above scheme. We're going to take our 3 address bits and say that
968 * they start at bit 12, so this means that the bits removed are [14:12]. So the
969 * following are the 8 addresses that we have here and where they end up
970 * starting with 1ff:
971 *
972 * o 0x01ff -> 0x1ff, Channel 0 (hash 0b000)
973 * o 0x11ff -> 0x1ff, Channel 1 (hash 0b001)
974 * o 0x21ff -> 0x1ff, Channel 2 (hash 0b010)
975 * o 0x31ff -> 0x1ff, Channel 3 (hash 0b011)
976 * o 0x41ff -> 0x1ff, Channel 4 (hash 0b100)
977 * o 0x51ff -> 0x1ff, Channel 5 (hash 0b101)
978 * o 0x61ff -> 0x3000001ff, Channel 0 (hash 0b110)
979 * o 0x71ff -> 0x3000001ff, Channel 1 (hash 0b111)
980 *
981 * Yes, we did just jump to near the top of what is a 16 GiB DIMM's range for
982 * those last two. The way we determine when to do this jump is based on our
983 * hash. Effectively we ask what is hash[2:1]. If it is 0b11, then we need to
984 * do something different and enter this special case, basically jumping to the
985 * top of the range. If we think about a 6-channel configuration for a moment,
986 * the thing that doesn't exist are the traditional 8-channel hash DIMMs 0b110
987 * and 0b111.
988 *
989 * If you go back to the interleave this kind of meshes, that tried to handle
990 * the case of the hash being 0, 1, and 2, normally, and then did special things
991 * with the case of the hash being in this upper quadrant. The hash then
992 * determined where it went by shifting over the upper address and doing a mod
993 * 3 and using that to determine the upper two bits. With that weird address at
994 * the top of the range, let's go through and see what else actually goes to
995 * those weird addresses:
996 *
997 * o 0x08000061ff -> 0x3000001ff, Channel 2 (hash 0b110)
998 * o 0x08000071ff -> 0x3000001ff, Channel 3 (hash 0b111)
999 * o 0x10000061ff -> 0x3000001ff, Channel 4 (hash 0b110)
1000 * o 0x10000071ff -> 0x3000001ff, Channel 5 (hash 0b111)
1001 *
1002 * Based on the above you can see that we've split the 16 GiB DIMM into a 12 GiB
1003 * region (e.g. [ 0x0, 0x300000000 ), and a 4 GiB region [ 0x300000000,
1004 * 0x400000000 ). What seems to happen is that the CPU algorithmically is going
1005 * to put things in this upper range. To perform that action it goes back to the
1006 * register information that we stored in DF::Np2ChannelConfig. The way this
1007 * seems to be thought of is it wants to set the upper two bits of a 64 KiB
1008 * chunk (e.g. bits [15:14]) to 0b11 and then shift that over based on the DIMM
1009 * size.
1010 *
1011 * Our 16 GiB DIMM has 34 bits, so effectively we want to set bits [33:32] in
1012 * this case. The channel is 37 bits wide, which the CPU again knows as 2^21 *
1013 * 2^16. So it constructs the 64 KiB value of [15:14] = 0b11 and fills the rest
1014 * with zeros. It then multiplies it by 2^(21 - 3), or 2^18. The - 3 comes from
1015 * the fact that we removed 3 address bits. This when added to the above gets
1016 * us bits [33,32] = 0b11.
1017 *
1018 * While this appears to be the logic, I don't have a proof that this scheme
1019 * actually evenly covers the entire range, but a few examples appear to work
1020 * out.
1021 *
1022 * With this, the standard example flow that we give, results in something like:
1023 *
1024 * o 6-channel Zen 3, starting at address 11. 2M and 1G range enabled. Here,
1025 * we assume that the value of the NP2 space0 is 21 bits. This example
1026 * assumes we have 96 GiB total memory, which means rounding up to 128 GiB.
1027 *
1028 * Step 1 here is to adjust our address to remove the three bits indicated.
1029 * So we simply always set our new address to:
1030 *
1031 * orig[63:14] >> 3 | orig[10:0]
1032 * | +-> stays the same
1033 * +--> Relocated to bit 11 because a 6-channel config always uses 3 bits to
1034 * perform interleaving.
1035 *
1036 * At this step, one would need to consult the hash of the normalized
1037 * address before removing bits (but after adjusting for the base / DRAM
1038 * hole). If hash[2:1] == 3, then we would say that the address is actually:
1039 *
1040 * 0b11 << 32 | orig[63:14] >> 3 | orig[10:0]
1041 *
1042 *
1043 * ZEN 4 NON-POWER OF 2
1044 *
1045 * Next, we have the DFv4 versions of the 3, 5, 6, 10, and 12 channel hashing.
1046 * An important part of this is whether or not there is any socket hashing going
1047 * on. Recall there, that if socket hashing was going on, then it is part of the
1048 * interleave logic; however, if it is not, then its hash actually becomes
1049 * part of the normalized address, but not in the same spot!
1050 *
1051 * In this mode, we always remove the bits that are actually used by the hash.
1052 * Recall that some modes use hash[0], others hash[0] and hash[2], and then only
1053 * the 12-channel config uses hash[2:0]. This means we need to be careful in how
1054 * we actually remove address bits. All other bits in this lower range we end up
1055 * keeping and using. The top bits, e.g. addr[63:14] are kept and divided by the
1056 * actual channel-modulus. If we're not performing socket interleaving and
1057 * therefore need to keep the value of hash[0], then it is appended as the least
1058 * significant bit of that calculation.
1059 *
1060 * Let's look at an example of this to try to make sense of it all.
1061 *
1062 * o 6-channel Zen 4, starting at address 8. 64K, 2M, and 1G range enabled.
1063 * 1-die and 2-socket interleaving.
1064 *
1065 * Here we'd start by calculating hash[2:0] as described in the earlier
1066 * interleaving situation. Because we're using a socket interleave, we will
1067 * not opt to include hash[0] in the higher-level address calculation.
1068 * Because this is a 6-channel calculation, our modulus is 3. Here, we will
1069 * strip out bits 8 and 13 (recall in the interleaving 6-channel example we
1070 * ignored hash[1], thus no bit 12 here). Our new address will be:
1071 *
1072 * (orig[63:14] / 3) >> 2 | orig[12:9] >> 1 | orig[7:0]
1073 * | | +-> stays the same
1074 * | +-> relocated to bit 8 -- shifted by 1 because
1075 * | we removed bit 8.
1076 * +--> Relocated to bit 12 -- shifted by 2 because we removed bits 8 and
1077 * 13.
1078 *
1079 * o 12-channel Zen 4, starting at address 8. 64K, 2M, and 1G range enabled.
1080 * 1-die and 1-socket interleaving.
1081 *
1082 * This is a slightly different case from the above in two ways. First, we
1083 * will end up removing bits 8, 12, and 13, but then we'll also reuse
1084 * hash[0]. Our new address will be:
1085 *
1086 * ((orig[63:14] / 3) << 1 | hash[0]) >> 3 | orig[11:9] >> 1 | orig[7:0]
1087 * | | +-> stays the
1088 * | | same
1089 * | +-> relocated to bit 8 -- shifted by
1090 * | 1 because we removed bit 8.
1091 * +--> Relocated to bit 11 -- shifted by 3 because we removed bits 8, 12,
1092 * and 13.
1093 *
1094 * DF 4D2 NPS 1K/2K NON-POWER OF 2
1095 *
1096 * Unsurprisingly, if you've followed to this point, there is a slightly
1097 * different normalization scheme that is used here. Like in the other cases we
1098 * end up breaking the address into the three parts that are used: a lower
1099 * portion that remains the same, a middle portion that is from bits that were
1100 * not used as part of the interleaving process, and the upper portion which is
1101 * where we end up with our division (like the Zen 4 case above). To add to the
1102 * fun, the upper portion that gets divided sometimes has some lower parts of
1103 * the address tossed up there.
1104 *
1105 * Because each case is unique, we have created a data table in the decoder:
1106 * zen_umc_np2_k_rules. This structure has a number of pieces that describe how
1107 * to transform the address. Logically this computation looks like:
1108 *
1109 * [ upper address / modulus ] | middle bits | low bits
1110 * | | |
1111 * | | +-> Always bits (rule start, 0]
1112 * | |
1113 * | +-> The starting bit is zukr_norm_addr. There
1114 * | are zukr_norm_naddr bits. This is:
1115 * | (zukr_norm_addr + zukr_norm_naddr,
1116 * | zukr_norm_addr].
1117 * |
1118 * +--> This has two portions everything from (64, zukr_high] and then the
1119 * optional bonus region, which is indicated by zukr_div_addr and
1120 * zukr_div_naddr. These bits are always the low bit. Meaning that the
1121 * initial bits will be shifted over by zukr_div_naddr before we
1122 * perform the division.
1123 *
1124 * Once each of these three pieces has been calculated, all the resulting pieces
1125 * will be shifted so they are contiguous like the other cases as though the
1126 * removed bits didn't exist.
1127 *
1128 *
1129 * That's most of the normalization process for the time being. We will have to
1130 * revisit this when we have to transform a normal address into a system address
1131 * and undo all this.
1132 *
1133 * -------------------------------------
1134 * Selecting a DIMM and UMC Organization
1135 * -------------------------------------
1136 *
1137 * One of the more nuanced things in decoding and encoding is the question of
1138 * where do we send a channel normalized address. That is, now that we've gotten
1139 * to a given channel, we need to transform the address into something
1140 * meaningful for a DIMM, and select a DIMM as well. The UMC SMN space contains
1141 * a number of Base Address and Mask registers which they describe as activating
1142 * a chip-select. A given UMC has up to four primary chip-selects (we'll come
1143 * back to DDR5 sub-channels later). The first two always go to the first DIMM
1144 * in the channel and the latter two always go to the second DIMM in the
1145 * channel. Put another way, you can always determine which DIMM you are
1146 * referring to by taking the chip-select and shifting it by 1.
1147 *
1148 * The UMC Channel registers are organized a bit differently in different
1149 * hardware generations. In a DDR5 based UMC, almost all of our settings are on
1150 * a per-chip-select basis while as in a DDR4 based system only the bases and
1151 * masks are. While gathering data we normalize this such that each logical
1152 * chip-select (umc_cs_t) that we have in the system has the same data so that
1153 * way DDR4 and DDR5 based systems are the same to the decoding logic. There is
1154 * also channel-wide data such as hash configurations and related.
1155 *
1156 * Each channel has a set of base and mask registers (and secondary ones as
1157 * well). To determine if we activate a given one, we first check if the
1158 * enabled bit is set. The enabled bit is set on a per-base basis, so both the
1159 * primary and secondary registers have separate enables. As there are four of
1160 * each base, mask, secondary base, and secondary mask, we say that if a
1161 * normalized address matches either a given indexes primary or secondary index,
1162 * then it activates that given UMC index. The basic formula for an enabled
1163 * selection is:
1164 *
1165 * NormAddr & ~Mask[i] == Base[i] & ~Mask[i]
1166 *
1167 * Once this is selected, this index in the UMC is what it always used to derive
1168 * the rest of the information that is specific to a given chip-select or DIMM.
1169 * An important thing to remember is that from this point onwards, while there
1170 * is a bunch of hashing and interleaving logic it doesn't change which UMC
1171 * channel we read the data from. Though the particular DIMM, rank, and address
1172 * we access will change as we go through hashing and interleaving.
1173 *
1174 * ------------------------
1175 * Row and Column Selection
1176 * ------------------------
1177 *
1178 * The number of bits that are used for the row and column address of a DIMM
1179 * varies based on the type of module itself. These depend on the density of a
1180 * DIMM module, e.g. how large an individual DRAM block is, a value such as 16
1181 * Gbit, and the number of these wide it is, which is generally phrased as X4,
1182 * X8, and X16. The memory controller encodes the number of bits (derived from
1183 * the DIMM's SPD data) and then determines which bits are used for addresses.
1184 *
1185 * Based on this information we can initially construct a row and a column
1186 * address by leveraging the information about the number of bits and then
1187 * extracting the correct bits out of the normalized channel address.
1188 *
1189 * If you've made it this far, you know nothing is quite this simple, despite it
1190 * seeming so. Importantly, not all DIMMs actually have storage that is a power
1191 * of 2. As such, there's another bit that we have to consult to transform the
1192 * actual value that we have for a row, remarkably the column somehow has no
1193 * transformations applied to it.
1194 *
1195 * The hardware gives us information on inverting the two 'most significant
1196 * bits' of the row address which we store in 'ucs_inv_msbs'. First, we have the
1197 * question of what are our most significant bits here. This is basically
1198 * determined by the number of low and high row bits. In this case higher
1199 * actually is what we want. Note, the high row bits only exist in DDR4. Next,
1200 * we need to know whether we used the primary or secondary base/mask pair for
1201 * this as there is a primary and secondary inversion bits. The higher bit of
1202 * the inversion register (e.g ucs_inv_msbs[1]) corresponds to the highest row
1203 * bit. A zero in the bit position indicates that we should not perform an
1204 * inversion where as a one says that we should invert this.
1205 *
1206 * To actually make this happen we can take advantage of the fact that the
1207 * meaning of a 0/1 above means that this can be implemented with a binary
1208 * exclusive-OR (XOR). Logically speaking if we have a don't invert setting
1209 * present, a 0, then x ^ 0 is always x. However, if we have a 1 present, then
1210 * we know that (for a single bit) x ^ 1 = ~x. We take advantage of this fact in
1211 * the row logic.
1212 *
1213 * ---------------------
1214 * Banks and Bank Groups
1215 * ---------------------
1216 *
1217 * While addressing within a given module is done by the use of a row and column
1218 * address, to increase storage density a module generally has a number of
1219 * banks, which may be organized into one or more bank groups. While a given
1220 * DDR4/5 access happens in some prefetched chunk of say 64 bytes (what do you
1221 * know, that's a cacheline), that all occurs within a single bank. The addition
1222 * of bank groups makes it easier to access data in parallel -- it is often
1223 * faster to read from another bank group than to read another region inside a
1224 * bank group.
1225 *
1226 * Based on the DIMMs internal configuration, there will be a specified number
1227 * of bits used for the overall bank address (including bank group bits)
1228 * followed by a number of bits actually used for bank groups. There are
1229 * separately an array of bits used to concoct the actual address. It appears,
1230 * mostly through experimental evidence, that the bank group bits occur first
1231 * and then are followed by the bank selection itself. This makes some sense if
1232 * you assume that switching bank groups is faster than switching banks.
1233 *
1234 * So if we see the UMC noting 4 bank bits and 2 bank groups bits, that means
1235 * that the umc_cs_t's ucs_bank_bits[1:0] correspond to bank_group[1:0] and
1236 * ucs_bank_bits[3:2] correspond to bank_address[1:0]. However, if there were no
1237 * bank bits indicated, then all of the address bits would correspond to the
1238 * bank address.
1239 *
1240 * Now, this would all be straightforward if not for hashing, our favorite.
1241 * There are five bank hashing registers per channel (UMC_BANK_HASH_DDR4,
1242 * UMC_BANK_HASH_DDR5), one that corresponds to the five possible bank bits. To
1243 * do this we need to use the calculated row and column that we previously
1244 * determined. This calculation happens in a few steps:
1245 *
1246 * 1) First check if the enable bit is set in the rule. If not, just use the
1247 * normal bank address bit and we're done.
1248 * 2) Take a bitwise-AND of the calculated row and hash register's row value.
1249 * Next do the same thing for the column.
1250 * 3) For each bit in the row, progressively XOR it, e.g. row[0] ^ row[1] ^
1251 * row[2] ^ ... to calculate a net bit value for the row. This then
1252 * repeats itself for the column. What basically has happened is that we're
1253 * using the hash register to select which bits to impact our decision.
1254 * Think of this as a traditional bitwise functional reduce.
1255 * 4) XOR the combined rank bit with the column bit and the actual bank
1256 * address bit from the normalized address. So if this were bank bit 0,
1257 * which indicated we should use bit 15 for bank[0], then we would
1258 * ultimately say our new bit is norm_addr[15] ^ row_xor ^ col_xor
1259 *
1260 * An important caveat is that we would only consult all this if we actually
1261 * were told that the bank bit was being used. For example if we had 3 bank
1262 * bits, then we'd only check the first 3 hash registers. The latter two would
1263 * be ignored.
1264 *
1265 * Once this process is done, then we can go back and split the activated bank
1266 * into the actual bank used and the bank group used based on the first bits
1267 * going to the bank group.
1268 *
1269 * ---------------
1270 * DDR5 Sub-channel
1271 * ---------------
1272 *
1273 * As described in the definitions section, DDR5 has the notion of a
1274 * sub-channel. Here, a single bit is used to determine which of the
1275 * sub-channels to actually operate and utilize. Importantly the same
1276 * chip-select seems to apply to both halves of a given sub-channel.
1277 *
1278 * There is also a hash that is used here. The hash here utilizes the calculated
1279 * bank, column, and row and follows the same pattern used in the bank
1280 * calculation where we do a bunch of running exclusive-ORs and then do that
1281 * with the original value we found to get the new value. Because there's only
1282 * one bit for the sub-channel, we only have a single hash to consider.
1283 *
1284 * -------------------------------------------
1285 * Ranks, Chip-Select, and Rank Multiplication
1286 * -------------------------------------------
1287 *
1288 * The notion of ranks and the chip-select are interwoven. From a strict DDR4
1289 * RDIMM perspective, there are two lines that are dedicated for chip-selects
1290 * and then another two that are shared with three 'chip-id' bits that are used
1291 * in 3DS RDIMMs. In all cases the controller starts with two logical chip
1292 * selects and then uses something called rank multiplication to figure out how
1293 * to multiplex that and map to the broader set of things. Basically, in
1294 * reality, DDR4 RDIMMs allow for 4 bits to determine a rank and then 3DS RDIMMs
1295 * use 2 bits for a rank and 3 bits to select a stacked chip. In DDR5 this is
1296 * different and you just have 2 bits for a rank.
1297 *
1298 * It's not entirely clear from what we know from AMD, but it seems that we use
1299 * the RM bits as a way to basically go beyond the basic 2 bits of chip-select
1300 * which is determined based on which channel we logically activate. Initially
1301 * we treat this as two distinct things, here as that's what we get from the
1302 * hardware. There are two hashes here a chip-select and rank-multiplication
1303 * hash. Unlike the others, which rely on the bank, row, and column addresses,
1304 * this hash relies on the normalized address. So we calculate that mask and do
1305 * our same xor dance.
1306 *
1307 * There is one hash for each rank multiplication bit and chip-select bit. The
1308 * number of rank multiplication bits is given to us. The number of chip-select
1309 * bits is fixed, it's simply two because there are four base/mask registers and
1310 * logical chip-selects in a given UMC channel. The chip-select on some DDR5
1311 * platforms has a secondary exclusive-OR hash that can be applied. As this only
1312 * exists in some families, for any where it does exist, we seed it to be zero
1313 * so that it becomes a no-op.
1314 *
1315 * -----------
1316 * Future Work
1317 * -----------
1318 *
1319 * As the road goes ever on and on, down from the door where it began, there are
1320 * still some stops on the journey for this driver. In particular, here are the
1321 * major open areas that could be implemented to extend what this can do:
1322 *
1323 * o The ability to transform a normalized channel address back to a system
1324 * address. This is required for MCA/MCA-X error handling as those generally
1325 * work in terms of channel addresses.
1326 * o Integrating with the MCA/MCA-X error handling paths so that way we can
1327 * take correct action in the face of ECC errors and allowing recovery from
1328 * uncorrectable errors.
1329 * o Providing memory controller information to FMA so that way it can opt to
1330 * do predictive failure or give us more information about what is fault
1331 * with ECC errors.
1332 * o Figuring out if we will get MCEs for privileged address decoding and if
1333 * so mapping those back to system addresses and related.
1334 * o 3DS RDIMMs likely will need a little bit of work to ensure we're handling
1335 * the resulting combination of the RM bits and CS and reporting it
1336 * intelligently.
1337 * o Support for the MI300-specific interleave decoding.
1338 * o Understanding the error flow for CXL related address decoding and if we
1339 * should support it in this driver.
1340 */
1341
1342 #include <sys/types.h>
1343 #include <sys/file.h>
1344 #include <sys/errno.h>
1345 #include <sys/open.h>
1346 #include <sys/cred.h>
1347 #include <sys/ddi.h>
1348 #include <sys/sunddi.h>
1349 #include <sys/stat.h>
1350 #include <sys/conf.h>
1351 #include <sys/devops.h>
1352 #include <sys/cmn_err.h>
1353 #include <sys/x86_archext.h>
1354 #include <sys/sysmacros.h>
1355 #include <sys/mc.h>
1356
1357 #include <zen_umc.h>
1358 #include <sys/amdzen/df.h>
1359 #include <sys/amdzen/umc.h>
1360
1361 static zen_umc_t *zen_umc;
1362
1363 /*
1364 * Per-CPU family information that describes the set of capabilities that they
1365 * implement. When adding support for new CPU generations, you must go through
1366 * what documentation you have and validate these. The best bet is to find a
1367 * similar processor and see what has changed. Unfortunately, there really isn't
1368 * a substitute for just basically checking every register. The family name
1369 * comes from the amdzen_c_family(). One additional note for new CPUs, if our
1370 * parent amdzen nexus driver does not attach (because the DF has changed PCI
1371 * IDs or more), then just adding something here will not be sufficient to make
1372 * it work.
1373 */
1374 static const zen_umc_fam_data_t zen_umc_fam_data[] = {
1375 {
1376 .zufd_family = X86_PF_AMD_NAPLES,
1377 .zufd_dram_nrules = 16,
1378 .zufd_cs_nrules = 2,
1379 .zufd_umc_style = ZEN_UMC_UMC_S_DDR4,
1380 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_CS,
1381 .zufd_base_instid = 0
1382 }, {
1383 .zufd_family = X86_PF_HYGON_DHYANA,
1384 .zufd_dram_nrules = 16,
1385 .zufd_cs_nrules = 2,
1386 .zufd_umc_style = ZEN_UMC_UMC_S_DDR4,
1387 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_CS,
1388 .zufd_base_instid = 0
1389 }, {
1390 .zufd_family = X86_PF_AMD_DALI,
1391 .zufd_dram_nrules = 2,
1392 .zufd_cs_nrules = 2,
1393 .zufd_umc_style = ZEN_UMC_UMC_S_DDR4_APU,
1394 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_CS,
1395 .zufd_base_instid = 0
1396 }, {
1397 .zufd_family = X86_PF_AMD_ROME,
1398 .zufd_flags = ZEN_UMC_FAM_F_NP2 | ZEN_UMC_FAM_F_NORM_HASH |
1399 ZEN_UMC_FAM_F_UMC_HASH,
1400 .zufd_dram_nrules = 16,
1401 .zufd_cs_nrules = 2,
1402 .zufd_umc_style = ZEN_UMC_UMC_S_DDR4,
1403 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_RM |
1404 UMC_CHAN_HASH_F_CS,
1405 .zufd_base_instid = 0
1406 }, {
1407 .zufd_family = X86_PF_AMD_RENOIR,
1408 .zufd_flags = ZEN_UMC_FAM_F_NORM_HASH,
1409 .zufd_dram_nrules = 2,
1410 .zufd_cs_nrules = 2,
1411 .zufd_umc_style = ZEN_UMC_UMC_S_DDR4_APU,
1412 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_PC |
1413 UMC_CHAN_HASH_F_CS,
1414 .zufd_base_instid = 0
1415 }, {
1416 .zufd_family = X86_PF_AMD_MATISSE,
1417 .zufd_flags = ZEN_UMC_FAM_F_NORM_HASH | ZEN_UMC_FAM_F_UMC_HASH,
1418 .zufd_dram_nrules = 16,
1419 .zufd_cs_nrules = 2,
1420 .zufd_umc_style = ZEN_UMC_UMC_S_DDR4,
1421 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_RM |
1422 UMC_CHAN_HASH_F_CS,
1423 .zufd_base_instid = 0
1424 }, {
1425 .zufd_family = X86_PF_AMD_VAN_GOGH,
1426 .zufd_flags = ZEN_UMC_FAM_F_NORM_HASH,
1427 .zufd_dram_nrules = 2,
1428 .zufd_cs_nrules = 2,
1429 .zufd_umc_style = ZEN_UMC_UMC_S_HYBRID_LPDDR5,
1430 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_CS,
1431 .zufd_base_instid = 0
1432 }, {
1433 .zufd_family = X86_PF_AMD_MENDOCINO,
1434 .zufd_flags = ZEN_UMC_FAM_F_NORM_HASH,
1435 .zufd_dram_nrules = 2,
1436 .zufd_cs_nrules = 2,
1437 .zufd_umc_style = ZEN_UMC_UMC_S_HYBRID_LPDDR5,
1438 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_CS,
1439 .zufd_base_instid = 0
1440 }, {
1441 .zufd_family = X86_PF_AMD_MILAN,
1442 .zufd_flags = ZEN_UMC_FAM_F_TARG_REMAP | ZEN_UMC_FAM_F_NP2 |
1443 ZEN_UMC_FAM_F_NORM_HASH | ZEN_UMC_FAM_F_UMC_HASH,
1444 .zufd_dram_nrules = 16,
1445 .zufd_cs_nrules = 2,
1446 .zufd_umc_style = ZEN_UMC_UMC_S_DDR4,
1447 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_RM |
1448 UMC_CHAN_HASH_F_CS,
1449 .zufd_base_instid = 0
1450 }, {
1451 .zufd_family = X86_PF_AMD_GENOA,
1452 .zufd_flags = ZEN_UMC_FAM_F_TARG_REMAP |
1453 ZEN_UMC_FAM_F_UMC_HASH | ZEN_UMC_FAM_F_UMC_EADDR |
1454 ZEN_UMC_FAM_F_CS_XOR,
1455 .zufd_dram_nrules = 20,
1456 .zufd_cs_nrules = 4,
1457 .zufd_umc_style = ZEN_UMC_UMC_S_DDR5,
1458 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_RM |
1459 UMC_CHAN_HASH_F_PC | UMC_CHAN_HASH_F_CS,
1460 .zufd_base_instid = 0
1461 }, {
1462 .zufd_family = X86_PF_AMD_VERMEER,
1463 .zufd_flags = ZEN_UMC_FAM_F_NORM_HASH | ZEN_UMC_FAM_F_UMC_HASH,
1464 .zufd_dram_nrules = 16,
1465 .zufd_cs_nrules = 2,
1466 .zufd_umc_style = ZEN_UMC_UMC_S_DDR4,
1467 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_RM |
1468 UMC_CHAN_HASH_F_CS,
1469 .zufd_base_instid = 0
1470 }, {
1471 .zufd_family = X86_PF_AMD_REMBRANDT,
1472 .zufd_flags = ZEN_UMC_FAM_F_NORM_HASH,
1473 .zufd_dram_nrules = 2,
1474 .zufd_cs_nrules = 2,
1475 .zufd_umc_style = ZEN_UMC_UMC_S_DDR5_APU,
1476 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_CS,
1477 .zufd_base_instid = 0
1478 }, {
1479 .zufd_family = X86_PF_AMD_CEZANNE,
1480 .zufd_flags = ZEN_UMC_FAM_F_NORM_HASH,
1481 .zufd_dram_nrules = 2,
1482 .zufd_cs_nrules = 2,
1483 .zufd_umc_style = ZEN_UMC_UMC_S_DDR4_APU,
1484 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_PC |
1485 UMC_CHAN_HASH_F_CS,
1486 .zufd_base_instid = 0
1487 }, {
1488 .zufd_family = X86_PF_AMD_RAPHAEL,
1489 .zufd_flags = ZEN_UMC_FAM_F_CS_XOR,
1490 .zufd_dram_nrules = 2,
1491 .zufd_cs_nrules = 2,
1492 .zufd_umc_style = ZEN_UMC_UMC_S_DDR5,
1493 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_RM |
1494 UMC_CHAN_HASH_F_PC | UMC_CHAN_HASH_F_CS,
1495 .zufd_base_instid = 0
1496 }, {
1497 .zufd_family = X86_PF_AMD_BERGAMO,
1498 .zufd_flags = ZEN_UMC_FAM_F_TARG_REMAP |
1499 ZEN_UMC_FAM_F_UMC_HASH | ZEN_UMC_FAM_F_UMC_EADDR |
1500 ZEN_UMC_FAM_F_CS_XOR,
1501 .zufd_dram_nrules = 20,
1502 .zufd_cs_nrules = 4,
1503 .zufd_umc_style = ZEN_UMC_UMC_S_DDR5,
1504 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_RM |
1505 UMC_CHAN_HASH_F_PC | UMC_CHAN_HASH_F_CS,
1506 .zufd_base_instid = 0
1507 }, {
1508 .zufd_family = X86_PF_AMD_PHOENIX,
1509 .zufd_flags = ZEN_UMC_FAM_F_CS_XOR,
1510 .zufd_dram_nrules = 2,
1511 .zufd_cs_nrules = 2,
1512 .zufd_umc_style = ZEN_UMC_UMC_S_DDR5_APU,
1513 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_CS,
1514 .zufd_base_instid = 0
1515 }, {
1516 .zufd_family = X86_PF_AMD_STRIX,
1517 .zufd_flags = ZEN_UMC_FAM_F_CS_XOR,
1518 .zufd_dram_nrules = 2,
1519 .zufd_cs_nrules = 2,
1520 .zufd_umc_style = ZEN_UMC_UMC_S_DDR5_APU,
1521 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_CS,
1522 .zufd_base_instid = 0
1523 }, {
1524 .zufd_family = X86_PF_AMD_KRACKAN,
1525 .zufd_flags = ZEN_UMC_FAM_F_CS_XOR,
1526 .zufd_dram_nrules = 2,
1527 .zufd_cs_nrules = 2,
1528 .zufd_umc_style = ZEN_UMC_UMC_S_DDR5_APU,
1529 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_CS,
1530 .zufd_base_instid = 0
1531 }, {
1532 .zufd_family = X86_PF_AMD_STRIX_HALO,
1533 .zufd_flags = ZEN_UMC_FAM_F_CS_XOR,
1534 .zufd_dram_nrules = 3,
1535 .zufd_cs_nrules = 3,
1536 .zufd_umc_style = ZEN_UMC_UMC_S_DDR5_APU,
1537 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_CS,
1538 .zufd_base_instid = 0
1539 }, {
1540 .zufd_family = X86_PF_AMD_GRANITE_RIDGE,
1541 .zufd_flags = ZEN_UMC_FAM_F_CS_XOR,
1542 .zufd_dram_nrules = 2,
1543 .zufd_cs_nrules = 2,
1544 .zufd_umc_style = ZEN_UMC_UMC_S_DDR5,
1545 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_RM |
1546 UMC_CHAN_HASH_F_PC | UMC_CHAN_HASH_F_CS,
1547 .zufd_base_instid = 0
1548 }, {
1549 .zufd_family = X86_PF_AMD_TURIN,
1550 .zufd_flags = ZEN_UMC_FAM_F_UMC_HASH | ZEN_UMC_FAM_F_UMC_EADDR |
1551 ZEN_UMC_FAM_F_CS_XOR,
1552 .zufd_dram_nrules = 20,
1553 .zufd_cs_nrules = 4,
1554 .zufd_umc_style = ZEN_UMC_UMC_S_DDR5,
1555 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_RM |
1556 UMC_CHAN_HASH_F_PC | UMC_CHAN_HASH_F_CS,
1557 .zufd_base_instid = 0
1558 }, {
1559 .zufd_family = X86_PF_AMD_DENSE_TURIN,
1560 .zufd_flags = ZEN_UMC_FAM_F_UMC_HASH | ZEN_UMC_FAM_F_UMC_EADDR |
1561 ZEN_UMC_FAM_F_CS_XOR,
1562 .zufd_dram_nrules = 20,
1563 .zufd_cs_nrules = 4,
1564 .zufd_umc_style = ZEN_UMC_UMC_S_DDR5,
1565 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_RM |
1566 UMC_CHAN_HASH_F_PC | UMC_CHAN_HASH_F_CS,
1567 .zufd_base_instid = 0
1568 }
1569 };
1570
1571 /*
1572 * We use this for the DDR4 and Hybrid DDR4 + LPDDR5 tables to map between the
1573 * specific enumerated speeds which are encoded values and the corresponding
1574 * memory clock and speed. For all DDR4 and LPDDR5 items we assume a a 1:2 ratio
1575 * between them. This is not used for the pure DDR5 / LPDDR5 entries because of
1576 * how the register just encodes the raw value in MHz.
1577 */
1578 typedef struct zen_umc_freq_map {
1579 uint32_t zufm_reg;
1580 uint32_t zufm_mhz;
1581 uint32_t zufm_mts2;
1582 uint32_t zufm_mts4;
1583 } zen_umc_freq_map_t;
1584
1585 static const zen_umc_freq_map_t zen_umc_ddr4_map[] = {
1586 { UMC_DRAMCFG_DDR4_MEMCLK_667, 667, 1333, 0 },
1587 { UMC_DRAMCFG_DDR4_MEMCLK_800, 800, 1600, 0 },
1588 { UMC_DRAMCFG_DDR4_MEMCLK_933, 933, 1866, 0 },
1589 { UMC_DRAMCFG_DDR4_MEMCLK_1067, 1067, 2133, 0 },
1590 { UMC_DRAMCFG_DDR4_MEMCLK_1200, 1200, 2400, 0 },
1591 { UMC_DRAMCFG_DDR4_MEMCLK_1333, 1333, 2666, 0 },
1592 { UMC_DRAMCFG_DDR4_MEMCLK_1467, 1467, 2933, 0 },
1593 { UMC_DRAMCFG_DDR4_MEMCLK_1600, 1600, 3200, 0 }
1594 };
1595
1596 static const zen_umc_freq_map_t zen_umc_lpddr5_map[] = {
1597 { UMC_DRAMCFG_HYB_MEMCLK_333, 333, 667, 1333 },
1598 { UMC_DRAMCFG_HYB_MEMCLK_400, 400, 800, 1600 },
1599 { UMC_DRAMCFG_HYB_MEMCLK_533, 533, 1066, 2133 },
1600 { UMC_DRAMCFG_HYB_MEMCLK_687, 687, 1375, 2750 },
1601 { UMC_DRAMCFG_HYB_MEMCLK_750, 750, 1500, 3000 },
1602 { UMC_DRAMCFG_HYB_MEMCLK_800, 800, 1600, 3200 },
1603 { UMC_DRAMCFG_HYB_MEMCLK_933, 933, 1866, 3733 },
1604 { UMC_DRAMCFG_HYB_MEMCLK_1066, 1066, 2133, 4267 },
1605 { UMC_DRAMCFG_HYB_MEMCLK_1200, 1200, 2400, 4800 },
1606 { UMC_DRAMCFG_HYB_MEMCLK_1375, 1375, 2750, 5500 },
1607 { UMC_DRAMCFG_HYB_MEMCLK_1500, 1500, 3000, 6000 },
1608 { UMC_DRAMCFG_HYB_MEMCLK_1600, 1600, 3200, 6400 }
1609
1610 };
1611
1612 static boolean_t
zen_umc_identify(zen_umc_t * umc)1613 zen_umc_identify(zen_umc_t *umc)
1614 {
1615 for (uint_t i = 0; i < ARRAY_SIZE(zen_umc_fam_data); i++) {
1616 if (zen_umc_fam_data[i].zufd_family == umc->umc_family) {
1617 umc->umc_fdata = &zen_umc_fam_data[i];
1618 return (B_TRUE);
1619 }
1620 }
1621
1622 return (B_FALSE);
1623 }
1624
1625 /*
1626 * This operates on DFv2, DFv3, and DFv3.5 DRAM rules, which generally speaking
1627 * are in similar register locations and meanings, but the size of bits in
1628 * memory is not consistent.
1629 */
1630 static int
zen_umc_read_dram_rule_df_23(zen_umc_t * umc,const uint_t dfno,const uint_t inst,const uint_t ruleno,df_dram_rule_t * rule)1631 zen_umc_read_dram_rule_df_23(zen_umc_t *umc, const uint_t dfno,
1632 const uint_t inst, const uint_t ruleno, df_dram_rule_t *rule)
1633 {
1634 int ret;
1635 uint32_t base, limit;
1636 uint64_t dbase, dlimit;
1637 uint16_t addr_ileave, chan_ileave, sock_ileave, die_ileave, dest;
1638 boolean_t hash = B_FALSE;
1639 zen_umc_df_t *df = &umc->umc_dfs[dfno];
1640
1641 if ((ret = amdzen_c_df_read32(dfno, inst, DF_DRAM_BASE_V2(ruleno),
1642 &base)) != 0) {
1643 dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM base "
1644 "register %u on 0x%x/0x%x: %d", ruleno, dfno, inst, ret);
1645 return (ret);
1646 }
1647
1648 if ((ret = amdzen_c_df_read32(dfno, inst, DF_DRAM_LIMIT_V2(ruleno),
1649 &limit)) != 0) {
1650 dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM limit "
1651 "register %u on 0x%x/0x%x: %d", ruleno, dfno, inst, ret);
1652 return (ret);
1653 }
1654
1655
1656 rule->ddr_raw_base = base;
1657 rule->ddr_raw_limit = limit;
1658 rule->ddr_raw_ileave = rule->ddr_raw_ctrl = 0;
1659
1660 if (!DF_DRAM_BASE_V2_GET_VALID(base)) {
1661 return (0);
1662 }
1663
1664 /*
1665 * Extract all values from the registers and then normalize. While there
1666 * are often different bit patterns for the values, the interpretation
1667 * is the same across all the Zen 1-3 parts. That is while which bits
1668 * may be used for say channel interleave vary, the values of them are
1669 * consistent.
1670 */
1671 rule->ddr_flags |= DF_DRAM_F_VALID;
1672 if (DF_DRAM_BASE_V2_GET_HOLE_EN(base)) {
1673 rule->ddr_flags |= DF_DRAM_F_HOLE;
1674 }
1675
1676 dbase = DF_DRAM_BASE_V2_GET_BASE(base);
1677 dlimit = DF_DRAM_LIMIT_V2_GET_LIMIT(limit);
1678 switch (umc->umc_df_rev) {
1679 case DF_REV_2:
1680 addr_ileave = DF_DRAM_BASE_V2_GET_ILV_ADDR(base);
1681 chan_ileave = DF_DRAM_BASE_V2_GET_ILV_CHAN(base);
1682 die_ileave = DF_DRAM_LIMIT_V2_GET_ILV_DIE(limit);
1683 sock_ileave = DF_DRAM_LIMIT_V2_GET_ILV_SOCK(limit);
1684 dest = DF_DRAM_LIMIT_V2_GET_DEST_ID(limit);
1685 break;
1686 case DF_REV_3:
1687 addr_ileave = DF_DRAM_BASE_V3_GET_ILV_ADDR(base);
1688 sock_ileave = DF_DRAM_BASE_V3_GET_ILV_SOCK(base);
1689 die_ileave = DF_DRAM_BASE_V3_GET_ILV_DIE(base);
1690 chan_ileave = DF_DRAM_BASE_V3_GET_ILV_CHAN(base);
1691 dest = DF_DRAM_LIMIT_V3_GET_DEST_ID(limit);
1692 break;
1693 case DF_REV_3P5:
1694 addr_ileave = DF_DRAM_BASE_V3P5_GET_ILV_ADDR(base);
1695 sock_ileave = DF_DRAM_BASE_V3P5_GET_ILV_SOCK(base);
1696 die_ileave = DF_DRAM_BASE_V3P5_GET_ILV_DIE(base);
1697 chan_ileave = DF_DRAM_BASE_V3P5_GET_ILV_CHAN(base);
1698 dest = DF_DRAM_LIMIT_V3P5_GET_DEST_ID(limit);
1699 break;
1700 default:
1701 dev_err(umc->umc_dip, CE_WARN, "!encountered unsupported "
1702 "DF revision processing DRAM rules: 0x%x", umc->umc_df_rev);
1703 return (-1);
1704 }
1705
1706 rule->ddr_base = dbase << DF_DRAM_BASE_V2_BASE_SHIFT;
1707 rule->ddr_sock_ileave_bits = sock_ileave;
1708 rule->ddr_die_ileave_bits = die_ileave;
1709 switch (addr_ileave) {
1710 case DF_DRAM_ILV_ADDR_8:
1711 case DF_DRAM_ILV_ADDR_9:
1712 case DF_DRAM_ILV_ADDR_10:
1713 case DF_DRAM_ILV_ADDR_11:
1714 case DF_DRAM_ILV_ADDR_12:
1715 break;
1716 default:
1717 dev_err(umc->umc_dip, CE_WARN, "!encountered invalid address "
1718 "interleave on rule %u, df/inst 0x%x/0x%x: 0x%x", ruleno,
1719 dfno, inst, addr_ileave);
1720 return (EINVAL);
1721 }
1722 rule->ddr_addr_start = DF_DRAM_ILV_ADDR_BASE + addr_ileave;
1723
1724 switch (chan_ileave) {
1725 case DF_DRAM_BASE_V2_ILV_CHAN_1:
1726 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_1CH;
1727 break;
1728 case DF_DRAM_BASE_V2_ILV_CHAN_2:
1729 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_2CH;
1730 break;
1731 case DF_DRAM_BASE_V2_ILV_CHAN_4:
1732 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_4CH;
1733 break;
1734 case DF_DRAM_BASE_V2_ILV_CHAN_8:
1735 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_8CH;
1736 break;
1737 case DF_DRAM_BASE_V2_ILV_CHAN_6:
1738 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_6CH;
1739 break;
1740 case DF_DRAM_BASE_V2_ILV_CHAN_COD4_2:
1741 hash = B_TRUE;
1742 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_COD4_2CH;
1743 break;
1744 case DF_DRAM_BASE_V2_ILV_CHAN_COD2_4:
1745 hash = B_TRUE;
1746 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_COD2_4CH;
1747 break;
1748 case DF_DRAM_BASE_V2_ILV_CHAN_COD1_8:
1749 hash = B_TRUE;
1750 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_COD1_8CH;
1751 break;
1752 default:
1753 dev_err(umc->umc_dip, CE_WARN, "!encountered invalid channel "
1754 "interleave on rule %u, df/inst 0x%x/0x%x: 0x%x", ruleno,
1755 dfno, inst, chan_ileave);
1756 return (EINVAL);
1757 }
1758
1759 /*
1760 * If hashing is enabled, note which hashing rules apply to this
1761 * address. This is done to smooth over the differences between DFv3 and
1762 * DFv4, where the flags are in the rules themselves in the latter, but
1763 * global today.
1764 */
1765 if (hash) {
1766 if ((df->zud_flags & ZEN_UMC_DF_F_HASH_16_18) != 0) {
1767 rule->ddr_flags |= DF_DRAM_F_HASH_16_18;
1768 }
1769
1770 if ((df->zud_flags & ZEN_UMC_DF_F_HASH_21_23) != 0) {
1771 rule->ddr_flags |= DF_DRAM_F_HASH_21_23;
1772 }
1773
1774 if ((df->zud_flags & ZEN_UMC_DF_F_HASH_30_32) != 0) {
1775 rule->ddr_flags |= DF_DRAM_F_HASH_30_32;
1776 }
1777 }
1778
1779 /*
1780 * While DFv4 makes remapping explicit, it is basically always enabled
1781 * and used on supported platforms prior to that point. So flag such
1782 * supported platforms as ones that need to do this. On those systems
1783 * there is only one set of remap rules for an entire DF that are
1784 * determined based on the target socket. To indicate that we use the
1785 * DF_DRAM_F_REMAP_SOCK flag below and skip setting a remap target.
1786 */
1787 if ((umc->umc_fdata->zufd_flags & ZEN_UMC_FAM_F_TARG_REMAP) != 0) {
1788 rule->ddr_flags |= DF_DRAM_F_REMAP_EN | DF_DRAM_F_REMAP_SOCK;
1789 }
1790
1791 rule->ddr_limit = (dlimit << DF_DRAM_LIMIT_V2_LIMIT_SHIFT) +
1792 DF_DRAM_LIMIT_V2_LIMIT_EXCL;
1793 rule->ddr_dest_fabid = dest;
1794
1795 return (0);
1796 }
1797
1798 static int
zen_umc_read_dram_rule_df_4(zen_umc_t * umc,const uint_t dfno,const uint_t inst,const uint_t ruleno,df_dram_rule_t * rule)1799 zen_umc_read_dram_rule_df_4(zen_umc_t *umc, const uint_t dfno,
1800 const uint_t inst, const uint_t ruleno, df_dram_rule_t *rule)
1801 {
1802 int ret;
1803 uint16_t addr_ileave;
1804 uint32_t base, limit, ilv, ctl;
1805
1806 if ((ret = amdzen_c_df_read32(dfno, inst, DF_DRAM_BASE_V4(ruleno),
1807 &base)) != 0) {
1808 dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM base "
1809 "register %u on 0x%x/0x%x: %d", ruleno, dfno, inst, ret);
1810 return (ret);
1811 }
1812
1813 if ((ret = amdzen_c_df_read32(dfno, inst, DF_DRAM_LIMIT_V4(ruleno),
1814 &limit)) != 0) {
1815 dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM limit "
1816 "register %u on 0x%x/0x%x: %d", ruleno, dfno, inst, ret);
1817 return (ret);
1818 }
1819
1820 if ((ret = amdzen_c_df_read32(dfno, inst, DF_DRAM_ILV_V4(ruleno),
1821 &ilv)) != 0) {
1822 dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM "
1823 "interleave register %u on 0x%x/0x%x: %d", ruleno, dfno,
1824 inst, ret);
1825 return (ret);
1826 }
1827
1828 if ((ret = amdzen_c_df_read32(dfno, inst, DF_DRAM_CTL_V4(ruleno),
1829 &ctl)) != 0) {
1830 dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM control "
1831 "register %u on 0x%x/0x%x: %d", ruleno, dfno, inst, ret);
1832 return (ret);
1833 }
1834
1835 rule->ddr_raw_base = base;
1836 rule->ddr_raw_limit = limit;
1837 rule->ddr_raw_ileave = ilv;
1838 rule->ddr_raw_ctrl = ctl;
1839
1840 if (!DF_DRAM_CTL_V4_GET_VALID(ctl)) {
1841 return (0);
1842 }
1843
1844 rule->ddr_flags |= DF_DRAM_F_VALID;
1845 rule->ddr_base = DF_DRAM_BASE_V4_GET_ADDR(base);
1846 rule->ddr_base = rule->ddr_base << DF_DRAM_BASE_V4_BASE_SHIFT;
1847 rule->ddr_limit = DF_DRAM_LIMIT_V4_GET_ADDR(limit);
1848 rule->ddr_limit = (rule->ddr_limit << DF_DRAM_LIMIT_V4_LIMIT_SHIFT) +
1849 DF_DRAM_LIMIT_V4_LIMIT_EXCL;
1850 rule->ddr_dest_fabid = DF_DRAM_CTL_V4_GET_DEST_ID(ctl);
1851
1852 if (DF_DRAM_CTL_V4_GET_HASH_1G(ctl) != 0) {
1853 rule->ddr_flags |= DF_DRAM_F_HASH_30_32;
1854 }
1855
1856 if (DF_DRAM_CTL_V4_GET_HASH_2M(ctl) != 0) {
1857 rule->ddr_flags |= DF_DRAM_F_HASH_21_23;
1858 }
1859
1860 if (DF_DRAM_CTL_V4_GET_HASH_64K(ctl) != 0) {
1861 rule->ddr_flags |= DF_DRAM_F_HASH_16_18;
1862 }
1863
1864 if (DF_DRAM_CTL_V4_GET_REMAP_EN(ctl) != 0) {
1865 rule->ddr_flags |= DF_DRAM_F_REMAP_EN;
1866 rule->ddr_remap_ent = DF_DRAM_CTL_V4_GET_REMAP_SEL(ctl);
1867 }
1868
1869 if (DF_DRAM_CTL_V4_GET_HOLE_EN(ctl) != 0) {
1870 rule->ddr_flags |= DF_DRAM_F_HOLE;
1871 }
1872
1873 if (DF_DRAM_CTL_V4_GET_SCM(ctl) != 0) {
1874 rule->ddr_flags |= DF_DRAM_F_SCM;
1875 }
1876
1877 rule->ddr_sock_ileave_bits = DF_DRAM_ILV_V4_GET_SOCK(ilv);
1878 rule->ddr_die_ileave_bits = DF_DRAM_ILV_V4_GET_DIE(ilv);
1879 switch (DF_DRAM_ILV_V4_GET_CHAN(ilv)) {
1880 case DF_DRAM_ILV_V4_CHAN_1:
1881 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_1CH;
1882 break;
1883 case DF_DRAM_ILV_V4_CHAN_2:
1884 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_2CH;
1885 break;
1886 case DF_DRAM_ILV_V4_CHAN_4:
1887 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_4CH;
1888 break;
1889 case DF_DRAM_ILV_V4_CHAN_8:
1890 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_8CH;
1891 break;
1892 case DF_DRAM_ILV_V4_CHAN_16:
1893 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_16CH;
1894 break;
1895 case DF_DRAM_ILV_V4_CHAN_32:
1896 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_32CH;
1897 break;
1898 case DF_DRAM_ILV_V4_CHAN_NPS4_2CH:
1899 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS4_2CH;
1900 break;
1901 case DF_DRAM_ILV_V4_CHAN_NPS2_4CH:
1902 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS2_4CH;
1903 break;
1904 case DF_DRAM_ILV_V4_CHAN_NPS1_8CH:
1905 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS1_8CH;
1906 break;
1907 case DF_DRAM_ILV_V4_CHAN_NPS4_3CH:
1908 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS4_3CH;
1909 break;
1910 case DF_DRAM_ILV_V4_CHAN_NPS2_6CH:
1911 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS2_6CH;
1912 break;
1913 case DF_DRAM_ILV_V4_CHAN_NPS1_12CH:
1914 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS1_12CH;
1915 break;
1916 case DF_DRAM_ILV_V4_CHAN_NPS2_5CH:
1917 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS2_5CH;
1918 break;
1919 case DF_DRAM_ILV_V4_CHAN_NPS1_10CH:
1920 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS1_10CH;
1921 break;
1922 default:
1923 dev_err(umc->umc_dip, CE_WARN, "!encountered invalid channel "
1924 "interleave on rule %u, df/inst 0x%x/0x%x: 0x%x", ruleno,
1925 dfno, inst, DF_DRAM_ILV_V4_GET_CHAN(ilv));
1926
1927 break;
1928 }
1929
1930 addr_ileave = DF_DRAM_ILV_V4_GET_ADDR(ilv);
1931 switch (addr_ileave) {
1932 case DF_DRAM_ILV_ADDR_8:
1933 case DF_DRAM_ILV_ADDR_9:
1934 case DF_DRAM_ILV_ADDR_10:
1935 case DF_DRAM_ILV_ADDR_11:
1936 case DF_DRAM_ILV_ADDR_12:
1937 break;
1938 default:
1939 dev_err(umc->umc_dip, CE_WARN, "!encountered invalid address "
1940 "interleave on rule %u, df/inst 0x%x/0x%x: 0x%x", ruleno,
1941 dfno, inst, addr_ileave);
1942 return (EINVAL);
1943 }
1944 rule->ddr_addr_start = DF_DRAM_ILV_ADDR_BASE + addr_ileave;
1945
1946 return (0);
1947 }
1948
1949 static int
zen_umc_read_dram_rule_df_4d2(zen_umc_t * umc,const uint_t dfno,const uint_t inst,const uint_t ruleno,df_dram_rule_t * rule)1950 zen_umc_read_dram_rule_df_4d2(zen_umc_t *umc, const uint_t dfno,
1951 const uint_t inst, const uint_t ruleno, df_dram_rule_t *rule)
1952 {
1953 int ret;
1954 uint16_t addr_ileave;
1955 uint32_t base, limit, ilv, ctl;
1956
1957 if ((ret = amdzen_c_df_read32(dfno, inst, DF_DRAM_BASE_V4D2(ruleno),
1958 &base)) != 0) {
1959 dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM base "
1960 "register %u on 0x%x/0x%x: %d", ruleno, dfno, inst, ret);
1961 return (ret);
1962 }
1963
1964 if ((ret = amdzen_c_df_read32(dfno, inst, DF_DRAM_LIMIT_V4D2(ruleno),
1965 &limit)) != 0) {
1966 dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM limit "
1967 "register %u on 0x%x/0x%x: %d", ruleno, dfno, inst, ret);
1968 return (ret);
1969 }
1970
1971 if ((ret = amdzen_c_df_read32(dfno, inst, DF_DRAM_ILV_V4D2(ruleno),
1972 &ilv)) != 0) {
1973 dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM "
1974 "interleave register %u on 0x%x/0x%x: %d", ruleno, dfno,
1975 inst, ret);
1976 return (ret);
1977 }
1978
1979 if ((ret = amdzen_c_df_read32(dfno, inst, DF_DRAM_CTL_V4D2(ruleno),
1980 &ctl)) != 0) {
1981 dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM control "
1982 "register %u on 0x%x/0x%x: %d", ruleno, dfno, inst, ret);
1983 return (ret);
1984 }
1985
1986 rule->ddr_raw_base = base;
1987 rule->ddr_raw_limit = limit;
1988 rule->ddr_raw_ileave = ilv;
1989 rule->ddr_raw_ctrl = ctl;
1990
1991 if (!DF_DRAM_CTL_V4_GET_VALID(ctl)) {
1992 return (0);
1993 }
1994
1995 rule->ddr_flags |= DF_DRAM_F_VALID;
1996 rule->ddr_base = DF_DRAM_BASE_V4_GET_ADDR(base);
1997 rule->ddr_base = rule->ddr_base << DF_DRAM_BASE_V4_BASE_SHIFT;
1998 rule->ddr_limit = DF_DRAM_LIMIT_V4_GET_ADDR(limit);
1999 rule->ddr_limit = (rule->ddr_limit << DF_DRAM_LIMIT_V4_LIMIT_SHIFT) +
2000 DF_DRAM_LIMIT_V4_LIMIT_EXCL;
2001 rule->ddr_dest_fabid = DF_DRAM_CTL_V4D2_GET_DEST_ID(ctl);
2002
2003 if (DF_DRAM_CTL_V4D2_GET_HASH_1T(ctl) != 0) {
2004 rule->ddr_flags |= DF_DRAM_F_HASH_40_42;
2005 }
2006
2007 if (DF_DRAM_CTL_V4_GET_HASH_1G(ctl) != 0) {
2008 rule->ddr_flags |= DF_DRAM_F_HASH_30_32;
2009 }
2010
2011 if (DF_DRAM_CTL_V4_GET_HASH_2M(ctl) != 0) {
2012 rule->ddr_flags |= DF_DRAM_F_HASH_21_23;
2013 }
2014
2015 if (DF_DRAM_CTL_V4_GET_HASH_64K(ctl) != 0) {
2016 rule->ddr_flags |= DF_DRAM_F_HASH_16_18;
2017 }
2018
2019 if (DF_DRAM_CTL_V4D2_GET_HASH_4K(ctl) != 0) {
2020 rule->ddr_flags |= DF_DRAM_F_HASH_12_14;
2021 }
2022
2023 if (DF_DRAM_CTL_V4_GET_REMAP_EN(ctl) != 0) {
2024 rule->ddr_flags |= DF_DRAM_F_REMAP_EN;
2025 rule->ddr_remap_ent = DF_DRAM_CTL_V4D2_GET_REMAP_SEL(ctl);
2026 }
2027
2028 if (DF_DRAM_CTL_V4_GET_HOLE_EN(ctl) != 0) {
2029 rule->ddr_flags |= DF_DRAM_F_HOLE;
2030 }
2031
2032 if (DF_DRAM_CTL_V4_GET_SCM(ctl) != 0) {
2033 rule->ddr_flags |= DF_DRAM_F_SCM;
2034 }
2035
2036 rule->ddr_sock_ileave_bits = DF_DRAM_ILV_V4_GET_SOCK(ilv);
2037 rule->ddr_die_ileave_bits = DF_DRAM_ILV_V4_GET_DIE(ilv);
2038 switch (DF_DRAM_ILV_V4D2_GET_CHAN(ilv)) {
2039 case DF_DRAM_ILV_V4D2_CHAN_1:
2040 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_1CH;
2041 break;
2042 case DF_DRAM_ILV_V4D2_CHAN_2:
2043 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_2CH;
2044 break;
2045 case DF_DRAM_ILV_V4D2_CHAN_4:
2046 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_4CH;
2047 break;
2048 case DF_DRAM_ILV_V4D2_CHAN_8:
2049 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_8CH;
2050 break;
2051 case DF_DRAM_ILV_V4D2_CHAN_16:
2052 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_16CH;
2053 break;
2054 case DF_DRAM_ILV_V4D2_CHAN_32:
2055 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_32CH;
2056 break;
2057 case DF_DRAM_ILV_V4D2_CHAN_NPS1_16S8CH_1K:
2058 if (rule->ddr_sock_ileave_bits == 0) {
2059 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS1_16CH_1K;
2060 } else {
2061 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS1_8CH_1K;
2062 }
2063 break;
2064 case DF_DRAM_ILV_V4D2_CHAN_NPS0_24CH_1K:
2065 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS0_24CH_1K;
2066 break;
2067 case DF_DRAM_ILV_V4D2_CHAN_NPS4_2CH_1K:
2068 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS4_2CH_1K;
2069 break;
2070 case DF_DRAM_ILV_V4D2_CHAN_NPS2_4CH_1K:
2071 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS2_4CH_1K;
2072 break;
2073 case DF_DRAM_ILV_V4D2_CHAN_NPS1_8S4CH_1K:
2074 if (rule->ddr_sock_ileave_bits == 0) {
2075 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS1_8CH_1K;
2076 } else {
2077 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS2_4CH_1K;
2078 }
2079 break;
2080 case DF_DRAM_ILV_V4D2_CHAN_NPS4_3CH_1K:
2081 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS4_3CH_1K;
2082 break;
2083 case DF_DRAM_ILV_V4D2_CHAN_NPS2_6CH_1K:
2084 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS2_6CH_1K;
2085 break;
2086 case DF_DRAM_ILV_V4D2_CHAN_NPS1_12CH_1K:
2087 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS1_12CH_1K;
2088 break;
2089 case DF_DRAM_ILV_V4D2_CHAN_NPS2_5CH_1K:
2090 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS2_5CH_1K;
2091 break;
2092 case DF_DRAM_ILV_V4D2_CHAN_NPS1_10CH_1K:
2093 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS1_10CH_1K;
2094 break;
2095 case DF_DRAM_ILV_V4D2_CHAN_MI3H_8CH:
2096 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_MI3H_8CH;
2097 break;
2098 case DF_DRAM_ILV_V4D2_CHAN_MI3H_16CH:
2099 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_MI3H_16CH;
2100 break;
2101 case DF_DRAM_ILV_V4D2_CHAN_MI3H_32CH:
2102 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_MI3H_32CH;
2103 break;
2104 case DF_DRAM_ILV_V4D2_CHAN_NPS4_2CH_2K:
2105 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS4_2CH_2K;
2106 break;
2107 case DF_DRAM_ILV_V4D2_CHAN_NPS2_4CH_2K:
2108 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS2_4CH_2K;
2109 break;
2110 case DF_DRAM_ILV_V4D2_CHAN_NPS1_8S4CH_2K:
2111 if (rule->ddr_sock_ileave_bits == 0) {
2112 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS1_8CH_2K;
2113 } else {
2114 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS2_4CH_2K;
2115 }
2116 break;
2117 case DF_DRAM_ILV_V4D2_CHAN_NPS1_16S8CH_2K:
2118 if (rule->ddr_sock_ileave_bits == 0) {
2119 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS1_16CH_2K;
2120 } else {
2121 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS1_8CH_2K;
2122 }
2123 break;
2124 case DF_DRAM_ILV_V4D2_CHAN_NPS4_3CH_2K:
2125 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS4_3CH_2K;
2126 break;
2127 case DF_DRAM_ILV_V4D2_CHAN_NPS2_6CH_2K:
2128 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS2_6CH_2K;
2129 break;
2130 case DF_DRAM_ILV_V4D2_CHAN_NPS1_12CH_2K:
2131 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS1_12CH_2K;
2132 break;
2133 case DF_DRAM_ILV_V4D2_CHAN_NPS0_24CH_2K:
2134 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS0_24CH_2K;
2135 break;
2136 case DF_DRAM_ILV_V4D2_CHAN_NPS2_5CH_2K:
2137 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS2_5CH_2K;
2138 break;
2139 case DF_DRAM_ILV_V4D2_CHAN_NPS2_10CH_2K:
2140 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS1_10CH_2K;
2141 break;
2142 default:
2143 dev_err(umc->umc_dip, CE_WARN, "!encountered invalid channel "
2144 "interleave on rule %u, df/inst 0x%x/0x%x: 0x%x", ruleno,
2145 dfno, inst, DF_DRAM_ILV_V4_GET_CHAN(ilv));
2146 break;
2147 }
2148
2149 addr_ileave = DF_DRAM_ILV_V4_GET_ADDR(ilv);
2150 switch (addr_ileave) {
2151 case DF_DRAM_ILV_ADDR_8:
2152 case DF_DRAM_ILV_ADDR_9:
2153 case DF_DRAM_ILV_ADDR_10:
2154 case DF_DRAM_ILV_ADDR_11:
2155 case DF_DRAM_ILV_ADDR_12:
2156 break;
2157 default:
2158 dev_err(umc->umc_dip, CE_WARN, "!encountered invalid address "
2159 "interleave on rule %u, df/inst 0x%x/0x%x: 0x%x", ruleno,
2160 dfno, inst, addr_ileave);
2161 return (EINVAL);
2162 }
2163 rule->ddr_addr_start = DF_DRAM_ILV_ADDR_BASE + addr_ileave;
2164
2165 return (0);
2166 }
2167
2168 static int
zen_umc_read_dram_rule(zen_umc_t * umc,const uint_t dfno,const uint_t instid,const uint_t ruleno,df_dram_rule_t * rule)2169 zen_umc_read_dram_rule(zen_umc_t *umc, const uint_t dfno, const uint_t instid,
2170 const uint_t ruleno, df_dram_rule_t *rule)
2171 {
2172 int ret;
2173
2174 switch (umc->umc_df_rev) {
2175 case DF_REV_2:
2176 case DF_REV_3:
2177 case DF_REV_3P5:
2178 ret = zen_umc_read_dram_rule_df_23(umc, dfno, instid, ruleno,
2179 rule);
2180 break;
2181 case DF_REV_4:
2182 ret = zen_umc_read_dram_rule_df_4(umc, dfno, instid, ruleno,
2183 rule);
2184 break;
2185 case DF_REV_4D2:
2186 ret = zen_umc_read_dram_rule_df_4d2(umc, dfno, instid, ruleno,
2187 rule);
2188 break;
2189 default:
2190 dev_err(umc->umc_dip, CE_WARN, "!encountered unsupported "
2191 "DF revision processing DRAM rules: 0x%x", umc->umc_df_rev);
2192 return (-1);
2193 }
2194
2195 if (ret != 0) {
2196 dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM "
2197 "rule %u on df/inst 0x%x/0x%x: %d", ruleno,
2198 dfno, instid, ret);
2199 return (-1);
2200 }
2201
2202 return (0);
2203 }
2204
2205 /*
2206 * The Extended remapper has up to 4 remap rule sets. Each set addresses up to
2207 * 16 remap rules (ala DFv4), but the width of the targets is larger so they are
2208 * all split up amongst 3 registers instead. CPUs indicate support for this in
2209 * the DF::DfCapability register. Not all CPUs actually use all such entries. We
2210 * will read all entries, even if they are not in the PPR with the assumption
2211 * that a CPU DRAM rule will only ever refer to the ones that exist for the
2212 * moment. Our expectation is that these reserved registers are all 0s or all
2213 * 1s, but that has yet to be proven.
2214 */
2215 static int
zen_umc_read_extremap(zen_umc_t * umc,zen_umc_df_t * df,const uint_t instid)2216 zen_umc_read_extremap(zen_umc_t *umc, zen_umc_df_t *df, const uint_t instid)
2217 {
2218 const uint_t dfno = df->zud_dfno;
2219 const df_reg_def_t remapA[ZEN_UMC_MAX_CS_REMAPS] = {
2220 DF_CS_REMAP0A_V4D2, DF_CS_REMAP1A_V4D2, DF_CS_REMAP2A_V4D2,
2221 DF_CS_REMAP3A_V4D2 };
2222 const df_reg_def_t remapB[ZEN_UMC_MAX_CS_REMAPS] = {
2223 DF_CS_REMAP0B_V4D2, DF_CS_REMAP1B_V4D2, DF_CS_REMAP2B_V4D2,
2224 DF_CS_REMAP3B_V4D2 };
2225 const df_reg_def_t remapC[ZEN_UMC_MAX_CS_REMAPS] = {
2226 DF_CS_REMAP0C_V4D2, DF_CS_REMAP1C_V4D2, DF_CS_REMAP2C_V4D2,
2227 DF_CS_REMAP3C_V4D2 };
2228
2229 df->zud_cs_nremap = ZEN_UMC_MAX_CS_REMAPS;
2230 for (uint_t i = 0; i < df->zud_cs_nremap; i++) {
2231 int ret;
2232 uint32_t rm[3];
2233 zen_umc_cs_remap_t *remap = &df->zud_remap[i];
2234
2235 if ((ret = amdzen_c_df_read32(dfno, instid, remapA[i],
2236 &rm[0])) != 0) {
2237 dev_err(umc->umc_dip, CE_WARN, "!failed to read "
2238 "df/inst 0x%x/0x%x remap rule %uA: %d", dfno,
2239 instid, i, ret);
2240 return (-1);
2241 }
2242
2243 if ((ret = amdzen_c_df_read32(dfno, instid, remapB[i],
2244 &rm[1])) != 0) {
2245 dev_err(umc->umc_dip, CE_WARN, "!failed to read "
2246 "df/inst 0x%x/0x%x remap rule %uB: %d", dfno,
2247 instid, i, ret);
2248 return (-1);
2249 }
2250
2251 if ((ret = amdzen_c_df_read32(dfno, instid, remapC[i],
2252 &rm[2])) != 0) {
2253 dev_err(umc->umc_dip, CE_WARN, "!failed to read "
2254 "df/inst 0x%x/0x%x remap rule %uC: %d", dfno,
2255 instid, i, ret);
2256 return (-1);
2257 }
2258
2259 /*
2260 * Remap rule A has CS 0-5, B 6-11, C 12-15
2261 */
2262 remap->csr_nremaps = ZEN_UMC_MAX_REMAP_ENTS;
2263 for (uint_t ent = 0; ent < remap->csr_nremaps; ent++) {
2264 uint_t reg = ent / ZEN_UMC_REMAP_PER_REG_4D2;
2265 uint_t idx = ent % ZEN_UMC_REMAP_PER_REG_4D2;
2266 remap->csr_remaps[ent] =
2267 DF_CS_REMAP_GET_CSX_V4B(rm[reg], idx);
2268 }
2269 }
2270 return (0);
2271 }
2272
2273 static int
zen_umc_read_remap(zen_umc_t * umc,zen_umc_df_t * df,const uint_t instid)2274 zen_umc_read_remap(zen_umc_t *umc, zen_umc_df_t *df, const uint_t instid)
2275 {
2276 uint_t nremaps, nents;
2277 const uint_t dfno = df->zud_dfno;
2278 const df_reg_def_t milan_remap0[ZEN_UMC_MILAN_CS_NREMAPS] = {
2279 DF_SKT0_CS_REMAP0_V3, DF_SKT1_CS_REMAP0_V3 };
2280 const df_reg_def_t milan_remap1[ZEN_UMC_MILAN_CS_NREMAPS] = {
2281 DF_SKT0_CS_REMAP1_V3, DF_SKT1_CS_REMAP1_V3 };
2282 const df_reg_def_t dfv4_remapA[ZEN_UMC_MAX_CS_REMAPS] = {
2283 DF_CS_REMAP0A_V4, DF_CS_REMAP1A_V4, DF_CS_REMAP2A_V4,
2284 DF_CS_REMAP3A_V4 };
2285 const df_reg_def_t dfv4_remapB[ZEN_UMC_MAX_CS_REMAPS] = {
2286 DF_CS_REMAP0B_V4, DF_CS_REMAP1B_V4, DF_CS_REMAP2B_V4,
2287 DF_CS_REMAP3B_V4 };
2288 const df_reg_def_t *remapA, *remapB;
2289
2290
2291 switch (umc->umc_df_rev) {
2292 case DF_REV_3:
2293 nremaps = ZEN_UMC_MILAN_CS_NREMAPS;
2294 nents = ZEN_UMC_MILAN_REMAP_ENTS;
2295 remapA = milan_remap0;
2296 remapB = milan_remap1;
2297 break;
2298 case DF_REV_4:
2299 nremaps = ZEN_UMC_MAX_CS_REMAPS;
2300 nents = ZEN_UMC_MAX_REMAP_ENTS;
2301 remapA = dfv4_remapA;
2302 remapB = dfv4_remapB;
2303 break;
2304 case DF_REV_4D2:
2305 return (zen_umc_read_extremap(umc, df, instid));
2306 default:
2307 dev_err(umc->umc_dip, CE_WARN, "!encountered unsupported DF "
2308 "revision processing remap rules: 0x%x", umc->umc_df_rev);
2309 return (-1);
2310 }
2311
2312 df->zud_cs_nremap = nremaps;
2313 for (uint_t i = 0; i < nremaps; i++) {
2314 int ret;
2315 uint32_t rm[2];
2316 zen_umc_cs_remap_t *remap = &df->zud_remap[i];
2317
2318 if ((ret = amdzen_c_df_read32(dfno, instid, remapA[i],
2319 &rm[0])) != 0) {
2320 dev_err(umc->umc_dip, CE_WARN, "!failed to read "
2321 "df/inst 0x%x/0x%x remap socket %u-0/A: %d", dfno,
2322 instid, i, ret);
2323 return (-1);
2324 }
2325
2326 if ((ret = amdzen_c_df_read32(dfno, instid, remapB[i],
2327 &rm[1])) != 0) {
2328 dev_err(umc->umc_dip, CE_WARN, "!failed to read "
2329 "df/inst 0x%x/0x%x remap socket %u-1/B: %d", dfno,
2330 instid, i, ret);
2331 return (-1);
2332 }
2333
2334 remap->csr_nremaps = nents;
2335 for (uint_t ent = 0; ent < remap->csr_nremaps; ent++) {
2336 uint_t reg = ent / ZEN_UMC_REMAP_PER_REG;
2337 uint_t idx = ent % ZEN_UMC_REMAP_PER_REG;
2338 remap->csr_remaps[ent] = DF_CS_REMAP_GET_CSX(rm[reg],
2339 idx);
2340 }
2341 }
2342
2343 return (0);
2344 }
2345
2346 /*
2347 * Now that we have a CCM, we have several different tasks ahead of us:
2348 *
2349 * o Determine whether or not the DRAM hole is valid.
2350 * o Snapshot all of the system address rules and translate them into our
2351 * generic format.
2352 * o Determine if there are any rules to retarget things (currently
2353 * Milan/Genoa).
2354 * o Determine if there are any other hashing rules enabled.
2355 *
2356 * We only require this from a single CCM as these are currently required to be
2357 * the same across all of them.
2358 */
2359 static int
zen_umc_fill_ccm_cb(const uint_t dfno,const uint32_t fabid,const uint32_t instid,void * arg)2360 zen_umc_fill_ccm_cb(const uint_t dfno, const uint32_t fabid,
2361 const uint32_t instid, void *arg)
2362 {
2363 zen_umc_t *umc = arg;
2364 zen_umc_df_t *df = &umc->umc_dfs[dfno];
2365 df_reg_def_t hole;
2366 int ret;
2367 uint32_t val;
2368
2369 df->zud_dfno = dfno;
2370 df->zud_ccm_inst = instid;
2371
2372 /*
2373 * Read the DF::DfCapability register. This is not instance specific.
2374 */
2375 if ((ret = amdzen_c_df_read32_bcast(dfno, DF_CAPAB, &df->zud_capab)) !=
2376 0) {
2377 dev_err(umc->umc_dip, CE_WARN, "!failed to read DF Capability "
2378 "register: %d", ret);
2379 return (-1);
2380 }
2381
2382 /*
2383 * Next get the DRAM hole. This has the same layout, albeit different
2384 * registers across our different platforms.
2385 */
2386 switch (umc->umc_df_rev) {
2387 case DF_REV_2:
2388 case DF_REV_3:
2389 case DF_REV_3P5:
2390 hole = DF_DRAM_HOLE_V2;
2391 break;
2392 case DF_REV_4:
2393 case DF_REV_4D2:
2394 hole = DF_DRAM_HOLE_V4;
2395 break;
2396 default:
2397 dev_err(umc->umc_dip, CE_WARN, "!encountered unsupported "
2398 "DF version: 0x%x", umc->umc_df_rev);
2399 return (-1);
2400 }
2401
2402 if ((ret = amdzen_c_df_read32(dfno, instid, hole, &val)) != 0) {
2403 dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM Hole: %d",
2404 ret);
2405 return (-1);
2406 }
2407
2408 df->zud_hole_raw = val;
2409 if (DF_DRAM_HOLE_GET_VALID(val)) {
2410 uint64_t t;
2411
2412 df->zud_flags |= ZEN_UMC_DF_F_HOLE_VALID;
2413 t = DF_DRAM_HOLE_GET_BASE(val);
2414 df->zud_hole_base = t << DF_DRAM_HOLE_BASE_SHIFT;
2415 }
2416
2417 /*
2418 * Prior to Zen 4, the hash information was global and applied to all
2419 * COD rules globally. Check if we're on such a system and snapshot this
2420 * so we can use it during the rule application. Note, this was added in
2421 * DFv3.
2422 */
2423 if (umc->umc_df_rev == DF_REV_3 || umc->umc_df_rev == DF_REV_3P5) {
2424 uint32_t globctl;
2425
2426 if ((ret = amdzen_c_df_read32(dfno, instid, DF_GLOB_CTL_V3,
2427 &globctl)) != 0) {
2428 dev_err(umc->umc_dip, CE_WARN, "!failed to read global "
2429 "control: %d", ret);
2430 return (-1);
2431 }
2432
2433 df->zud_glob_ctl_raw = globctl;
2434 if (DF_GLOB_CTL_V3_GET_HASH_1G(globctl) != 0) {
2435 df->zud_flags |= ZEN_UMC_DF_F_HASH_30_32;
2436 }
2437
2438 if (DF_GLOB_CTL_V3_GET_HASH_2M(globctl) != 0) {
2439 df->zud_flags |= ZEN_UMC_DF_F_HASH_21_23;
2440 }
2441
2442 if (DF_GLOB_CTL_V3_GET_HASH_64K(globctl) != 0) {
2443 df->zud_flags |= ZEN_UMC_DF_F_HASH_16_18;
2444 }
2445 }
2446
2447 df->zud_dram_nrules = umc->umc_fdata->zufd_dram_nrules;
2448 for (uint_t i = 0; i < umc->umc_fdata->zufd_dram_nrules; i++) {
2449 if (zen_umc_read_dram_rule(umc, dfno, instid, i,
2450 &df->zud_rules[i]) != 0) {
2451 return (-1);
2452 }
2453 }
2454
2455 /*
2456 * Once AMD got past DF v4.0 there was a feature bit that indicates
2457 * support for the remapping engine in the DF_CAPAB (DF::DfCapability)
2458 * register. Prior to that we must use our table.
2459 */
2460 if ((umc->umc_df_rev >= DF_REV_4D2 &&
2461 DF_CAPAB_GET_EXTCSREMAP(df->zud_capab) != 0) ||
2462 (umc->umc_fdata->zufd_flags & ZEN_UMC_FAM_F_TARG_REMAP) != 0) {
2463 if (zen_umc_read_remap(umc, df, instid) != 0) {
2464 return (-1);
2465 }
2466 }
2467
2468 /*
2469 * We only want a single entry, so always return 1 to terminate us
2470 * early.
2471 */
2472 return (1);
2473 }
2474
2475 /*
2476 * At this point we can go through and calculate the size of the DIMM that we've
2477 * found. While it would be nice to determine this from the SPD data, we can
2478 * figure this out entirely based upon the information in the memory controller.
2479 *
2480 * This works by first noting that DDR4, LPDDR4, DDR5, and LPDDR5 are all built
2481 * around 64-bit data channels. This means that each row and column provides up
2482 * 64-bits (ignoring ECC) of data. There are a number of banks and bank groups.
2483 * The memory controller tracks the total number of bits that are used for each.
2484 * While DDR5 introduces sub-channels, we don't need to worry about those here,
2485 * because ultimately the sub-channel just splits the 64-bit bus we're assuming
2486 * into 2x 32-bit buses. While they can be independently selected, they should
2487 * have equivalent capacities.
2488 *
2489 * The most confusing part of this is that there is one of these related to each
2490 * rank on the device. The UMC natively has two 'chip-selects', each of which is
2491 * used to correspond to a rank. There are then separately multiple rm bits in
2492 * each chip-select. As far as we can tell the PSP or SMU programs the number of
2493 * rm bits to be zero when you have a dual-rank device.
2494 *
2495 * We end up summing each chip-select rather than assuming that the chip-selects
2496 * are identical. In theory some amount of asymmetric DIMMs exist in the wild,
2497 * but we don't know of many systems using them.
2498 */
2499 static void
zen_umc_calc_dimm_size(umc_dimm_t * dimm)2500 zen_umc_calc_dimm_size(umc_dimm_t *dimm)
2501 {
2502 dimm->ud_dimm_size = 0;
2503 for (uint_t i = 0; i < ZEN_UMC_MAX_CHAN_BASE; i++) {
2504 uint64_t nrc;
2505 const umc_cs_t *cs = &dimm->ud_cs[i];
2506
2507 if ((cs->ucs_flags & UMC_CS_F_DECODE_EN) == 0) {
2508 continue;
2509 }
2510
2511 nrc = cs->ucs_nrow_lo + cs->ucs_nrow_hi + cs->ucs_ncol;
2512 dimm->ud_dimm_size += (8ULL << nrc) * (1 << cs->ucs_nbanks) *
2513 (1 << cs->ucs_nrm);
2514 }
2515 }
2516
2517 /*
2518 * This is used to fill in the common properties about a DIMM. This should occur
2519 * after the rank information has been filled out. The information used is the
2520 * same between DDR4 and DDR5 DIMMs. The only major difference is the register
2521 * offset.
2522 */
2523 static boolean_t
zen_umc_fill_dimm_common(zen_umc_t * umc,zen_umc_df_t * df,zen_umc_chan_t * chan,const uint_t dimmno,boolean_t ddr4_style)2524 zen_umc_fill_dimm_common(zen_umc_t *umc, zen_umc_df_t *df, zen_umc_chan_t *chan,
2525 const uint_t dimmno, boolean_t ddr4_style)
2526 {
2527 umc_dimm_t *dimm;
2528 int ret;
2529 smn_reg_t reg;
2530 uint32_t val;
2531 const uint32_t id = chan->chan_logid;
2532
2533 dimm = &chan->chan_dimms[dimmno];
2534 dimm->ud_dimmno = dimmno;
2535
2536 if (ddr4_style) {
2537 reg = UMC_DIMMCFG_DDR4(id, dimmno);
2538 } else {
2539 reg = UMC_DIMMCFG_DDR5(id, dimmno);
2540 }
2541 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
2542 dev_err(umc->umc_dip, CE_WARN, "failed to read DIMM "
2543 "configuration register %x: %d", SMN_REG_ADDR(reg), ret);
2544 return (B_FALSE);
2545 }
2546 dimm->ud_dimmcfg_raw = val;
2547
2548 if (UMC_DIMMCFG_GET_X16(val) != 0) {
2549 dimm->ud_width = UMC_DIMM_W_X16;
2550 } else if (UMC_DIMMCFG_GET_X4(val) != 0) {
2551 dimm->ud_width = UMC_DIMM_W_X4;
2552 } else {
2553 dimm->ud_width = UMC_DIMM_W_X8;
2554 }
2555
2556 if (UMC_DIMMCFG_GET_3DS(val) != 0) {
2557 dimm->ud_kind = UMC_DIMM_K_3DS_RDIMM;
2558 } else if (UMC_DIMMCFG_GET_LRDIMM(val) != 0) {
2559 dimm->ud_kind = UMC_DIMM_K_LRDIMM;
2560 } else if (UMC_DIMMCFG_GET_RDIMM(val) != 0) {
2561 dimm->ud_kind = UMC_DIMM_K_RDIMM;
2562 } else {
2563 dimm->ud_kind = UMC_DIMM_K_UDIMM;
2564 }
2565
2566 /*
2567 * DIMM information in a UMC can be somewhat confusing. There are quite
2568 * a number of non-zero reset values that are here. Flag whether or not
2569 * we think this entry should be usable based on enabled chip-selects.
2570 */
2571 for (uint_t i = 0; i < ZEN_UMC_MAX_CHAN_BASE; i++) {
2572 if ((dimm->ud_cs[i].ucs_flags & UMC_CS_F_DECODE_EN) != 0) {
2573 dimm->ud_flags |= UMC_DIMM_F_VALID;
2574 break;
2575 }
2576 }
2577
2578 /*
2579 * The remaining calculations we only want to perform if we have actual
2580 * data for a DIMM.
2581 */
2582 if ((dimm->ud_flags & UMC_DIMM_F_VALID) == 0) {
2583 return (B_TRUE);
2584 }
2585
2586 zen_umc_calc_dimm_size(dimm);
2587
2588 return (B_TRUE);
2589 }
2590
2591 /*
2592 * Fill all the information about a DDR4 DIMM. In the DDR4 UMC, some of this
2593 * information is on a per-chip select basis while at other times it is on a
2594 * per-DIMM basis. In general, chip-selects 0/1 correspond to DIMM 0, and
2595 * chip-selects 2/3 correspond to DIMM 1. To normalize things with the DDR5 UMC
2596 * which generally has things stored on a per-rank/chips-select basis, we
2597 * duplicate information that is DIMM-wide into the chip-select data structure
2598 * (umc_cs_t).
2599 */
2600 static boolean_t
zen_umc_fill_chan_dimm_ddr4(zen_umc_t * umc,zen_umc_df_t * df,zen_umc_chan_t * chan,const uint_t dimmno)2601 zen_umc_fill_chan_dimm_ddr4(zen_umc_t *umc, zen_umc_df_t *df,
2602 zen_umc_chan_t *chan, const uint_t dimmno)
2603 {
2604 umc_dimm_t *dimm;
2605 umc_cs_t *cs0, *cs1;
2606 const uint32_t id = chan->chan_logid;
2607 int ret;
2608 uint32_t val;
2609 smn_reg_t reg;
2610
2611 ASSERT3U(dimmno, <, ZEN_UMC_MAX_DIMMS);
2612 dimm = &chan->chan_dimms[dimmno];
2613 cs0 = &dimm->ud_cs[0];
2614 cs1 = &dimm->ud_cs[1];
2615
2616 /*
2617 * DDR4 organization has initial data that exists on a per-chip select
2618 * basis. The rest of it is on a per-DIMM basis. First we grab the
2619 * per-chip-select data. After this for loop, we will always duplicate
2620 * all data that we gather into both chip-selects.
2621 */
2622 for (uint_t i = 0; i < ZEN_UMC_MAX_CS_PER_DIMM; i++) {
2623 uint64_t addr;
2624 const uint16_t reginst = i + dimmno * 2;
2625 reg = UMC_BASE(id, reginst);
2626 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
2627 dev_err(umc->umc_dip, CE_WARN, "failed to read base "
2628 "register %x: %d", SMN_REG_ADDR(reg), ret);
2629 return (B_FALSE);
2630 }
2631
2632 addr = (uint64_t)UMC_BASE_GET_ADDR(val) << UMC_BASE_ADDR_SHIFT;
2633 dimm->ud_cs[i].ucs_base.udb_base = addr;
2634 dimm->ud_cs[i].ucs_base.udb_valid = UMC_BASE_GET_EN(val);
2635
2636 reg = UMC_BASE_SEC(id, reginst);
2637 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
2638 dev_err(umc->umc_dip, CE_WARN, "failed to read "
2639 "secondary base register %x: %d", SMN_REG_ADDR(reg),
2640 ret);
2641 return (B_FALSE);
2642 }
2643
2644 addr = (uint64_t)UMC_BASE_GET_ADDR(val) << UMC_BASE_ADDR_SHIFT;
2645 dimm->ud_cs[i].ucs_sec.udb_base = addr;
2646 dimm->ud_cs[i].ucs_sec.udb_valid = UMC_BASE_GET_EN(val);
2647
2648 if (dimm->ud_cs[i].ucs_base.udb_valid ||
2649 dimm->ud_cs[i].ucs_sec.udb_valid) {
2650 dimm->ud_cs[i].ucs_flags |= UMC_CS_F_DECODE_EN;
2651 }
2652 }
2653
2654 reg = UMC_MASK_DDR4(id, dimmno);
2655 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
2656 dev_err(umc->umc_dip, CE_WARN, "failed to read mask register "
2657 "%x: %d", SMN_REG_ADDR(reg), ret);
2658 return (B_FALSE);
2659 }
2660
2661 /*
2662 * When we extract the masks, hardware only checks a limited range of
2663 * bits. Therefore we need to always OR in those lower order bits.
2664 */
2665 cs0->ucs_base_mask = (uint64_t)UMC_MASK_GET_ADDR(val) <<
2666 UMC_MASK_ADDR_SHIFT;
2667 cs0->ucs_base_mask |= (1 << UMC_MASK_ADDR_SHIFT) - 1;
2668 cs1->ucs_base_mask = cs0->ucs_base_mask;
2669
2670 reg = UMC_MASK_SEC_DDR4(id, dimmno);
2671 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
2672 dev_err(umc->umc_dip, CE_WARN, "failed to read secondary mask "
2673 "register %x: %d", SMN_REG_ADDR(reg), ret);
2674 return (B_FALSE);
2675 }
2676 cs0->ucs_sec_mask = (uint64_t)UMC_MASK_GET_ADDR(val) <<
2677 UMC_MASK_ADDR_SHIFT;
2678 cs0->ucs_sec_mask |= (1 << UMC_MASK_ADDR_SHIFT) - 1;
2679 cs1->ucs_sec_mask = cs0->ucs_sec_mask;
2680
2681 reg = UMC_ADDRCFG_DDR4(id, dimmno);
2682 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
2683 dev_err(umc->umc_dip, CE_WARN, "failed to read address config "
2684 "register %x: %d", SMN_REG_ADDR(reg), ret);
2685 return (B_FALSE);
2686 }
2687
2688 cs0->ucs_nbanks = UMC_ADDRCFG_GET_NBANK_BITS(val) +
2689 UMC_ADDRCFG_NBANK_BITS_BASE;
2690 cs1->ucs_nbanks = cs0->ucs_nbanks;
2691 cs0->ucs_ncol = UMC_ADDRCFG_GET_NCOL_BITS(val) +
2692 UMC_ADDRCFG_NCOL_BITS_BASE;
2693 cs1->ucs_ncol = cs0->ucs_ncol;
2694 cs0->ucs_nrow_hi = UMC_ADDRCFG_DDR4_GET_NROW_BITS_HI(val);
2695 cs1->ucs_nrow_hi = cs0->ucs_nrow_hi;
2696 cs0->ucs_nrow_lo = UMC_ADDRCFG_GET_NROW_BITS_LO(val) +
2697 UMC_ADDRCFG_NROW_BITS_LO_BASE;
2698 cs1->ucs_nrow_lo = cs0->ucs_nrow_lo;
2699 cs0->ucs_nbank_groups = UMC_ADDRCFG_GET_NBANKGRP_BITS(val);
2700 cs1->ucs_nbank_groups = cs0->ucs_nbank_groups;
2701 /*
2702 * As the chip-select XORs don't always show up, use a dummy value
2703 * that'll result in no change occurring here.
2704 */
2705 cs0->ucs_cs_xor = cs1->ucs_cs_xor = 0;
2706
2707 /*
2708 * APUs don't seem to support various rank select bits.
2709 */
2710 if (umc->umc_fdata->zufd_umc_style == ZEN_UMC_UMC_S_DDR4) {
2711 cs0->ucs_nrm = UMC_ADDRCFG_DDR4_GET_NRM_BITS(val);
2712 cs1->ucs_nrm = cs0->ucs_nrm;
2713 } else {
2714 cs0->ucs_nrm = cs1->ucs_nrm = 0;
2715 }
2716
2717 reg = UMC_ADDRSEL_DDR4(id, dimmno);
2718 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
2719 dev_err(umc->umc_dip, CE_WARN, "failed to read bank address "
2720 "select register %x: %d", SMN_REG_ADDR(reg), ret);
2721 return (B_FALSE);
2722 }
2723 cs0->ucs_row_hi_bit = UMC_ADDRSEL_DDR4_GET_ROW_HI(val) +
2724 UMC_ADDRSEL_DDR4_ROW_HI_BASE;
2725 cs1->ucs_row_hi_bit = cs0->ucs_row_hi_bit;
2726 cs0->ucs_row_low_bit = UMC_ADDRSEL_GET_ROW_LO(val) +
2727 UMC_ADDRSEL_ROW_LO_BASE;
2728 cs1->ucs_row_low_bit = cs0->ucs_row_low_bit;
2729 cs0->ucs_bank_bits[0] = UMC_ADDRSEL_GET_BANK0(val) +
2730 UMC_ADDRSEL_BANK_BASE;
2731 cs0->ucs_bank_bits[1] = UMC_ADDRSEL_GET_BANK1(val) +
2732 UMC_ADDRSEL_BANK_BASE;
2733 cs0->ucs_bank_bits[2] = UMC_ADDRSEL_GET_BANK2(val) +
2734 UMC_ADDRSEL_BANK_BASE;
2735 cs0->ucs_bank_bits[3] = UMC_ADDRSEL_GET_BANK3(val) +
2736 UMC_ADDRSEL_BANK_BASE;
2737 cs0->ucs_bank_bits[4] = UMC_ADDRSEL_GET_BANK4(val) +
2738 UMC_ADDRSEL_BANK_BASE;
2739 bcopy(cs0->ucs_bank_bits, cs1->ucs_bank_bits,
2740 sizeof (cs0->ucs_bank_bits));
2741
2742 reg = UMC_COLSEL_LO_DDR4(id, dimmno);
2743 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
2744 dev_err(umc->umc_dip, CE_WARN, "failed to read column address "
2745 "select low register %x: %d", SMN_REG_ADDR(reg), ret);
2746 return (B_FALSE);
2747 }
2748 for (uint_t i = 0; i < ZEN_UMC_MAX_COLSEL_PER_REG; i++) {
2749 cs0->ucs_col_bits[i] = UMC_COLSEL_REMAP_GET_COL(val, i) +
2750 UMC_COLSEL_LO_BASE;
2751 }
2752
2753 reg = UMC_COLSEL_HI_DDR4(id, dimmno);
2754 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
2755 dev_err(umc->umc_dip, CE_WARN, "failed to read column address "
2756 "select high register %x: %d", SMN_REG_ADDR(reg), ret);
2757 return (B_FALSE);
2758 }
2759 for (uint_t i = 0; i < ZEN_UMC_MAX_COLSEL_PER_REG; i++) {
2760 cs0->ucs_col_bits[i + ZEN_UMC_MAX_COLSEL_PER_REG] =
2761 UMC_COLSEL_REMAP_GET_COL(val, i) + UMC_COLSEL_HI_BASE;
2762 }
2763 bcopy(cs0->ucs_col_bits, cs1->ucs_col_bits, sizeof (cs0->ucs_col_bits));
2764
2765 /*
2766 * The next two registers give us information about a given rank select.
2767 * In the APUs, the inversion bits are there; however, the actual bit
2768 * selects are not. In this case we read the reserved bits regardless.
2769 * They should be ignored due to the fact that the number of banks is
2770 * zero.
2771 */
2772 reg = UMC_RMSEL_DDR4(id, dimmno);
2773 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
2774 dev_err(umc->umc_dip, CE_WARN, "failed to read rank address "
2775 "select register %x: %d", SMN_REG_ADDR(reg), ret);
2776 return (B_FALSE);
2777 }
2778 cs0->ucs_inv_msbs = UMC_RMSEL_DDR4_GET_INV_MSBE(val);
2779 cs1->ucs_inv_msbs = UMC_RMSEL_DDR4_GET_INV_MSBO(val);
2780 cs0->ucs_rm_bits[0] = UMC_RMSEL_DDR4_GET_RM0(val) +
2781 UMC_RMSEL_BASE;
2782 cs0->ucs_rm_bits[1] = UMC_RMSEL_DDR4_GET_RM1(val) +
2783 UMC_RMSEL_BASE;
2784 cs0->ucs_rm_bits[2] = UMC_RMSEL_DDR4_GET_RM2(val) +
2785 UMC_RMSEL_BASE;
2786 bcopy(cs0->ucs_rm_bits, cs1->ucs_rm_bits, sizeof (cs0->ucs_rm_bits));
2787
2788 reg = UMC_RMSEL_SEC_DDR4(id, dimmno);
2789 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
2790 dev_err(umc->umc_dip, CE_WARN, "failed to read secondary rank "
2791 "address select register %x: %d", SMN_REG_ADDR(reg), ret);
2792 return (B_FALSE);
2793 }
2794 cs0->ucs_inv_msbs_sec = UMC_RMSEL_DDR4_GET_INV_MSBE(val);
2795 cs1->ucs_inv_msbs_sec = UMC_RMSEL_DDR4_GET_INV_MSBO(val);
2796 cs0->ucs_rm_bits_sec[0] = UMC_RMSEL_DDR4_GET_RM0(val) +
2797 UMC_RMSEL_BASE;
2798 cs0->ucs_rm_bits_sec[1] = UMC_RMSEL_DDR4_GET_RM1(val) +
2799 UMC_RMSEL_BASE;
2800 cs0->ucs_rm_bits_sec[2] = UMC_RMSEL_DDR4_GET_RM2(val) +
2801 UMC_RMSEL_BASE;
2802 bcopy(cs0->ucs_rm_bits_sec, cs1->ucs_rm_bits_sec,
2803 sizeof (cs0->ucs_rm_bits_sec));
2804
2805 return (zen_umc_fill_dimm_common(umc, df, chan, dimmno, B_TRUE));
2806 }
2807
2808 /*
2809 * The DDR5 based systems are organized such that almost all the information we
2810 * care about is split between two different chip-select structures in the UMC
2811 * hardware SMN space.
2812 */
2813 static boolean_t
zen_umc_fill_chan_rank_ddr5(zen_umc_t * umc,zen_umc_df_t * df,zen_umc_chan_t * chan,const uint_t dimmno,const uint_t rankno)2814 zen_umc_fill_chan_rank_ddr5(zen_umc_t *umc, zen_umc_df_t *df,
2815 zen_umc_chan_t *chan, const uint_t dimmno, const uint_t rankno)
2816 {
2817 int ret;
2818 umc_cs_t *cs;
2819 uint32_t val;
2820 smn_reg_t reg;
2821 const uint32_t id = chan->chan_logid;
2822 const uint32_t regno = dimmno * 2 + rankno;
2823
2824 ASSERT3U(dimmno, <, ZEN_UMC_MAX_DIMMS);
2825 ASSERT3U(rankno, <, ZEN_UMC_MAX_CS_PER_DIMM);
2826 cs = &chan->chan_dimms[dimmno].ud_cs[rankno];
2827
2828 reg = UMC_BASE(id, regno);
2829 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
2830 dev_err(umc->umc_dip, CE_WARN, "failed to read base "
2831 "register %x: %d", SMN_REG_ADDR(reg), ret);
2832 return (B_FALSE);
2833 }
2834 cs->ucs_base.udb_base = (uint64_t)UMC_BASE_GET_ADDR(val) <<
2835 UMC_BASE_ADDR_SHIFT;
2836 cs->ucs_base.udb_valid = UMC_BASE_GET_EN(val);
2837 if ((umc->umc_fdata->zufd_flags & ZEN_UMC_FAM_F_UMC_EADDR) != 0) {
2838 uint64_t addr;
2839
2840 reg = UMC_BASE_EXT_DDR5(id, regno);
2841 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) !=
2842 0) {
2843 dev_err(umc->umc_dip, CE_WARN, "failed to read "
2844 "extended base register %x: %d", SMN_REG_ADDR(reg),
2845 ret);
2846 return (B_FALSE);
2847 }
2848
2849 addr = (uint64_t)UMC_BASE_EXT_GET_ADDR(val) <<
2850 UMC_BASE_EXT_ADDR_SHIFT;
2851 cs->ucs_base.udb_base |= addr;
2852 }
2853
2854 reg = UMC_BASE_SEC(id, regno);
2855 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
2856 dev_err(umc->umc_dip, CE_WARN, "failed to read secondary base "
2857 "register %x: %d", SMN_REG_ADDR(reg), ret);
2858 return (B_FALSE);
2859 }
2860 cs->ucs_sec.udb_base = (uint64_t)UMC_BASE_GET_ADDR(val) <<
2861 UMC_BASE_ADDR_SHIFT;
2862 cs->ucs_sec.udb_valid = UMC_BASE_GET_EN(val);
2863 if ((umc->umc_fdata->zufd_flags & ZEN_UMC_FAM_F_UMC_EADDR) != 0) {
2864 uint64_t addr;
2865
2866 reg = UMC_BASE_EXT_SEC_DDR5(id, regno);
2867 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) !=
2868 0) {
2869 dev_err(umc->umc_dip, CE_WARN, "failed to read "
2870 "extended secondary base register %x: %d",
2871 SMN_REG_ADDR(reg), ret);
2872 return (B_FALSE);
2873 }
2874
2875 addr = (uint64_t)UMC_BASE_EXT_GET_ADDR(val) <<
2876 UMC_BASE_EXT_ADDR_SHIFT;
2877 cs->ucs_sec.udb_base |= addr;
2878 }
2879
2880 if (cs->ucs_base.udb_valid || cs->ucs_sec.udb_valid) {
2881 cs->ucs_flags |= UMC_CS_F_DECODE_EN;
2882 }
2883
2884 reg = UMC_MASK_DDR5(id, regno);
2885 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
2886 dev_err(umc->umc_dip, CE_WARN, "failed to read mask "
2887 "register %x: %d", SMN_REG_ADDR(reg), ret);
2888 return (B_FALSE);
2889 }
2890 cs->ucs_base_mask = (uint64_t)UMC_MASK_GET_ADDR(val) <<
2891 UMC_MASK_ADDR_SHIFT;
2892 cs->ucs_base_mask |= (1 << UMC_MASK_ADDR_SHIFT) - 1;
2893 if ((umc->umc_fdata->zufd_flags & ZEN_UMC_FAM_F_UMC_EADDR) != 0) {
2894 uint64_t addr;
2895
2896 reg = UMC_MASK_EXT_DDR5(id, regno);
2897 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) !=
2898 0) {
2899 dev_err(umc->umc_dip, CE_WARN, "failed to read "
2900 "extended mask register %x: %d", SMN_REG_ADDR(reg),
2901 ret);
2902 return (B_FALSE);
2903 }
2904
2905 addr = (uint64_t)UMC_MASK_EXT_GET_ADDR(val) <<
2906 UMC_MASK_EXT_ADDR_SHIFT;
2907 cs->ucs_base_mask |= addr;
2908 }
2909
2910
2911 reg = UMC_MASK_SEC_DDR5(id, regno);
2912 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
2913 dev_err(umc->umc_dip, CE_WARN, "failed to read secondary mask "
2914 "register %x: %d", SMN_REG_ADDR(reg), ret);
2915 return (B_FALSE);
2916 }
2917 cs->ucs_sec_mask = (uint64_t)UMC_MASK_GET_ADDR(val) <<
2918 UMC_MASK_ADDR_SHIFT;
2919 cs->ucs_sec_mask |= (1 << UMC_MASK_ADDR_SHIFT) - 1;
2920 if ((umc->umc_fdata->zufd_flags & ZEN_UMC_FAM_F_UMC_EADDR) != 0) {
2921 uint64_t addr;
2922
2923 reg = UMC_MASK_EXT_SEC_DDR5(id, regno);
2924 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) !=
2925 0) {
2926 dev_err(umc->umc_dip, CE_WARN, "failed to read "
2927 "extended mask register %x: %d", SMN_REG_ADDR(reg),
2928 ret);
2929 return (B_FALSE);
2930 }
2931
2932 addr = (uint64_t)UMC_MASK_EXT_GET_ADDR(val) <<
2933 UMC_MASK_EXT_ADDR_SHIFT;
2934 cs->ucs_sec_mask |= addr;
2935 }
2936
2937 reg = UMC_ADDRCFG_DDR5(id, regno);
2938 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
2939 dev_err(umc->umc_dip, CE_WARN, "failed to read address config "
2940 "register %x: %d", SMN_REG_ADDR(reg), ret);
2941 return (B_FALSE);
2942 }
2943 if ((umc->umc_fdata->zufd_flags & ZEN_UMC_FAM_F_CS_XOR) != 0) {
2944 cs->ucs_cs_xor = UMC_ADDRCFG_DDR5_GET_CSXOR(val);
2945 } else {
2946 cs->ucs_cs_xor = 0;
2947 }
2948 cs->ucs_nbanks = UMC_ADDRCFG_GET_NBANK_BITS(val) +
2949 UMC_ADDRCFG_NBANK_BITS_BASE;
2950 cs->ucs_ncol = UMC_ADDRCFG_GET_NCOL_BITS(val) +
2951 UMC_ADDRCFG_NCOL_BITS_BASE;
2952 cs->ucs_nrow_lo = UMC_ADDRCFG_GET_NROW_BITS_LO(val) +
2953 UMC_ADDRCFG_NROW_BITS_LO_BASE;
2954 cs->ucs_nrow_hi = 0;
2955 cs->ucs_nrm = UMC_ADDRCFG_DDR5_GET_NRM_BITS(val);
2956 cs->ucs_nbank_groups = UMC_ADDRCFG_GET_NBANKGRP_BITS(val);
2957
2958 reg = UMC_ADDRSEL_DDR5(id, regno);
2959 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
2960 dev_err(umc->umc_dip, CE_WARN, "failed to read address select "
2961 "register %x: %d", SMN_REG_ADDR(reg), ret);
2962 return (B_FALSE);
2963 }
2964 cs->ucs_row_hi_bit = 0;
2965 cs->ucs_row_low_bit = UMC_ADDRSEL_GET_ROW_LO(val) +
2966 UMC_ADDRSEL_ROW_LO_BASE;
2967 cs->ucs_bank_bits[4] = UMC_ADDRSEL_GET_BANK4(val) +
2968 UMC_ADDRSEL_BANK_BASE;
2969 cs->ucs_bank_bits[3] = UMC_ADDRSEL_GET_BANK3(val) +
2970 UMC_ADDRSEL_BANK_BASE;
2971 cs->ucs_bank_bits[2] = UMC_ADDRSEL_GET_BANK2(val) +
2972 UMC_ADDRSEL_BANK_BASE;
2973 cs->ucs_bank_bits[1] = UMC_ADDRSEL_GET_BANK1(val) +
2974 UMC_ADDRSEL_BANK_BASE;
2975 cs->ucs_bank_bits[0] = UMC_ADDRSEL_GET_BANK0(val) +
2976 UMC_ADDRSEL_BANK_BASE;
2977
2978 reg = UMC_COLSEL_LO_DDR5(id, regno);
2979 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
2980 dev_err(umc->umc_dip, CE_WARN, "failed to read column address "
2981 "select low register %x: %d", SMN_REG_ADDR(reg), ret);
2982 return (B_FALSE);
2983 }
2984 for (uint_t i = 0; i < ZEN_UMC_MAX_COLSEL_PER_REG; i++) {
2985 cs->ucs_col_bits[i] = UMC_COLSEL_REMAP_GET_COL(val, i) +
2986 UMC_COLSEL_LO_BASE;
2987 }
2988
2989 reg = UMC_COLSEL_HI_DDR5(id, regno);
2990 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
2991 dev_err(umc->umc_dip, CE_WARN, "failed to read column address "
2992 "select high register %x: %d", SMN_REG_ADDR(reg), ret);
2993 return (B_FALSE);
2994 }
2995 for (uint_t i = 0; i < ZEN_UMC_MAX_COLSEL_PER_REG; i++) {
2996 cs->ucs_col_bits[i + ZEN_UMC_MAX_COLSEL_PER_REG] =
2997 UMC_COLSEL_REMAP_GET_COL(val, i) + UMC_COLSEL_HI_BASE;
2998 }
2999
3000 /*
3001 * Time for our friend, the RM Selection register. Like in DDR4 we end
3002 * up reading everything here, even though most others have reserved
3003 * bits here. The intent is that we won't look at the reserved bits
3004 * unless something actually points us there.
3005 */
3006 reg = UMC_RMSEL_DDR5(id, regno);
3007 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
3008 dev_err(umc->umc_dip, CE_WARN, "failed to read rank multiply "
3009 "select register %x: %d", SMN_REG_ADDR(reg), ret);
3010 return (B_FALSE);
3011 }
3012
3013 /*
3014 * DDR5 based devices have a primary and secondary msbs; however, they
3015 * only have a single set of rm bits. To normalize things with the DDR4
3016 * subsystem, we copy the primary bits to the secondary so we can use
3017 * these the same way in the decoder/encoder.
3018 */
3019 cs->ucs_inv_msbs = UMC_RMSEL_DDR5_GET_INV_MSBS(val);
3020 cs->ucs_inv_msbs_sec = UMC_RMSEL_DDR5_GET_INV_MSBS_SEC(val);
3021 cs->ucs_subchan = UMC_RMSEL_DDR5_GET_SUBCHAN(val) +
3022 UMC_RMSEL_DDR5_SUBCHAN_BASE;
3023 cs->ucs_rm_bits[3] = UMC_RMSEL_DDR5_GET_RM3(val) + UMC_RMSEL_BASE;
3024 cs->ucs_rm_bits[2] = UMC_RMSEL_DDR5_GET_RM2(val) + UMC_RMSEL_BASE;
3025 cs->ucs_rm_bits[1] = UMC_RMSEL_DDR5_GET_RM1(val) + UMC_RMSEL_BASE;
3026 cs->ucs_rm_bits[0] = UMC_RMSEL_DDR5_GET_RM0(val) + UMC_RMSEL_BASE;
3027 bcopy(cs->ucs_rm_bits, cs->ucs_rm_bits_sec,
3028 sizeof (cs->ucs_rm_bits));
3029
3030 return (zen_umc_fill_dimm_common(umc, df, chan, dimmno, B_FALSE));
3031 }
3032
3033 static void
zen_umc_fill_ddr_type(zen_umc_t * umc,zen_umc_chan_t * chan)3034 zen_umc_fill_ddr_type(zen_umc_t *umc, zen_umc_chan_t *chan)
3035 {
3036 umc_dimm_type_t dimm = UMC_DIMM_T_UNKNOWN;
3037 uint8_t val;
3038
3039 /*
3040 * The different UMC styles split into two groups. Those that support
3041 * DDR4 and those that support DDR5 (with the hybrid group being in the
3042 * DDR5 style camp). While all the values are consistent between
3043 * different ones (e.g. reserved values correspond to unsupported
3044 * items), we still check types based on the UMC's design type so if we
3045 * see something weird, we don't accidentally use an older value.
3046 */
3047 val = UMC_UMCCFG_GET_DDR_TYPE(chan->chan_umccfg_raw);
3048 switch (umc->umc_fdata->zufd_umc_style) {
3049 case ZEN_UMC_UMC_S_DDR4:
3050 case ZEN_UMC_UMC_S_DDR4_APU:
3051 switch (val) {
3052 case UMC_UMCCFG_DDR4_T_DDR4:
3053 dimm = UMC_DIMM_T_DDR4;
3054 break;
3055 case UMC_UMCCFG_DDR4_T_LPDDR4:
3056 dimm = UMC_DIMM_T_LPDDR4;
3057 break;
3058 default:
3059 break;
3060 }
3061 break;
3062 case ZEN_UMC_UMC_S_HYBRID_LPDDR5:
3063 switch (val) {
3064 case UMC_UMCCFG_DDR5_T_LPDDR5:
3065 dimm = UMC_DIMM_T_LPDDR5;
3066 break;
3067 case UMC_UMCCFG_DDR5_T_LPDDR4:
3068 dimm = UMC_DIMM_T_LPDDR4;
3069 break;
3070 default:
3071 break;
3072 }
3073 break;
3074 case ZEN_UMC_UMC_S_DDR5:
3075 case ZEN_UMC_UMC_S_DDR5_APU:
3076 switch (val) {
3077 case UMC_UMCCFG_DDR5_T_DDR5:
3078 dimm = UMC_DIMM_T_DDR5;
3079 break;
3080 case UMC_UMCCFG_DDR5_T_LPDDR5:
3081 dimm = UMC_DIMM_T_LPDDR5;
3082 break;
3083 default:
3084 break;
3085 }
3086 break;
3087 }
3088
3089 chan->chan_type = dimm;
3090 }
3091
3092 /*
3093 * Use the DDR4 frequency table to determine the speed of this. Note that our
3094 * hybrid based UMCs use 8 bits for the clock, while the traditional DDR4 ones
3095 * only use 7. The caller is responsible for using the right mask for the UMC.
3096 */
3097 static void
zen_umc_fill_chan_ddr4(zen_umc_chan_t * chan,uint_t mstate,const uint32_t clock)3098 zen_umc_fill_chan_ddr4(zen_umc_chan_t *chan, uint_t mstate,
3099 const uint32_t clock)
3100 {
3101 for (size_t i = 0; i < ARRAY_SIZE(zen_umc_ddr4_map); i++) {
3102 if (clock == zen_umc_ddr4_map[i].zufm_reg) {
3103 chan->chan_clock[mstate] = zen_umc_ddr4_map[i].zufm_mhz;
3104 chan->chan_speed[mstate] =
3105 zen_umc_ddr4_map[i].zufm_mts2;
3106 break;
3107 }
3108 }
3109 }
3110
3111 static void
zen_umc_fill_chan_hyb_lpddr5(zen_umc_chan_t * chan,uint_t mstate)3112 zen_umc_fill_chan_hyb_lpddr5(zen_umc_chan_t *chan, uint_t mstate)
3113 {
3114 const uint32_t reg = chan->chan_dramcfg_raw[mstate];
3115 const uint32_t wck = UMC_DRAMCFG_HYB_GET_WCLKRATIO(reg);
3116 const uint32_t clock = UMC_DRAMCFG_HYB_GET_MEMCLK(reg);
3117 boolean_t twox;
3118
3119 switch (wck) {
3120 case UMC_DRAMCFG_WCLKRATIO_1TO2:
3121 twox = B_TRUE;
3122 break;
3123 case UMC_DRAMCFG_WCLKRATIO_1TO4:
3124 twox = B_FALSE;
3125 break;
3126 default:
3127 return;
3128 }
3129
3130 for (size_t i = 0; i < ARRAY_SIZE(zen_umc_lpddr5_map); i++) {
3131 if (clock == zen_umc_lpddr5_map[i].zufm_reg) {
3132 chan->chan_clock[mstate] =
3133 zen_umc_lpddr5_map[i].zufm_mhz;
3134
3135 if (twox) {
3136 chan->chan_speed[mstate] =
3137 zen_umc_lpddr5_map[i].zufm_mts2;
3138 } else {
3139 chan->chan_speed[mstate] =
3140 zen_umc_lpddr5_map[i].zufm_mts4;
3141 }
3142 break;
3143 }
3144 }
3145 }
3146
3147 /*
3148 * Determine the current operating frequency of the channel. This varies based
3149 * upon the type of UMC that we're operating on as there are multiple ways to
3150 * determine this. There are up to four memory P-states that exist in the UMC.
3151 * This grabs it for a single P-state at a time.
3152 *
3153 * Unlike other things, if we cannot determine the frequency of the clock or
3154 * transfer speed, we do not consider this fatal because that does not stop
3155 * decoding. It only means that we cannot give a bit of useful information to
3156 * topo.
3157 */
3158 static void
zen_umc_fill_chan_freq(zen_umc_t * umc,zen_umc_chan_t * chan,uint_t mstate)3159 zen_umc_fill_chan_freq(zen_umc_t *umc, zen_umc_chan_t *chan, uint_t mstate)
3160 {
3161 const uint32_t cfg = chan->chan_dramcfg_raw[mstate];
3162 const umc_dimm_type_t dimm_type = chan->chan_type;
3163
3164 switch (umc->umc_fdata->zufd_umc_style) {
3165 case ZEN_UMC_UMC_S_HYBRID_LPDDR5:
3166 if (dimm_type == UMC_DIMM_T_LPDDR5) {
3167 zen_umc_fill_chan_hyb_lpddr5(chan, mstate);
3168 } else if (dimm_type != UMC_DIMM_T_LPDDR4) {
3169 zen_umc_fill_chan_ddr4(chan, mstate,
3170 UMC_DRAMCFG_HYB_GET_MEMCLK(cfg));
3171 }
3172 break;
3173 case ZEN_UMC_UMC_S_DDR4:
3174 case ZEN_UMC_UMC_S_DDR4_APU:
3175 zen_umc_fill_chan_ddr4(chan, mstate,
3176 UMC_DRAMCFG_DDR4_GET_MEMCLK(cfg));
3177 break;
3178 case ZEN_UMC_UMC_S_DDR5:
3179 case ZEN_UMC_UMC_S_DDR5_APU:
3180 chan->chan_clock[mstate] = UMC_DRAMCFG_DDR5_GET_MEMCLK(cfg);
3181 if (dimm_type == UMC_DIMM_T_DDR5) {
3182 chan->chan_speed[mstate] = 2 * chan->chan_clock[mstate];
3183 } else if (dimm_type == UMC_DIMM_T_LPDDR5) {
3184 switch (UMC_DRAMCFG_LPDDR5_GET_WCKRATIO(cfg)) {
3185 case UMC_DRAMCFG_WCLKRATIO_1TO2:
3186 chan->chan_speed[mstate] = 2 *
3187 chan->chan_clock[mstate];
3188 break;
3189 case UMC_DRAMCFG_WCLKRATIO_1TO4:
3190 chan->chan_speed[mstate] = 4 *
3191 chan->chan_clock[mstate];
3192 break;
3193 default:
3194 break;
3195 }
3196 }
3197 break;
3198 }
3199 }
3200
3201 /*
3202 * Fill common channel information. While the locations of many of the registers
3203 * changed between the DDR4-capable and DDR5-capable devices, the actual
3204 * contents are the same so we process them together.
3205 */
3206 static boolean_t
zen_umc_fill_chan_hash(zen_umc_t * umc,zen_umc_df_t * df,zen_umc_chan_t * chan,boolean_t ddr4)3207 zen_umc_fill_chan_hash(zen_umc_t *umc, zen_umc_df_t *df, zen_umc_chan_t *chan,
3208 boolean_t ddr4)
3209 {
3210 int ret;
3211 smn_reg_t reg;
3212 uint32_t val;
3213
3214 const umc_chan_hash_flags_t flags = umc->umc_fdata->zufd_chan_hash;
3215 const uint32_t id = chan->chan_logid;
3216 umc_chan_hash_t *chash = &chan->chan_hash;
3217 chash->uch_flags = flags;
3218
3219 if ((flags & UMC_CHAN_HASH_F_BANK) != 0) {
3220 for (uint_t i = 0; i < ZEN_UMC_MAX_CHAN_BANK_HASH; i++) {
3221 umc_bank_hash_t *bank = &chash->uch_bank_hashes[i];
3222
3223 if (ddr4) {
3224 reg = UMC_BANK_HASH_DDR4(id, i);
3225 } else {
3226 reg = UMC_BANK_HASH_DDR5(id, i);
3227 }
3228
3229 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg,
3230 &val)) != 0) {
3231 dev_err(umc->umc_dip, CE_WARN, "failed to read "
3232 "bank hash register %x: %d",
3233 SMN_REG_ADDR(reg), ret);
3234 return (B_FALSE);
3235 }
3236
3237 bank->ubh_row_xor = UMC_BANK_HASH_GET_ROW(val);
3238 bank->ubh_col_xor = UMC_BANK_HASH_GET_COL(val);
3239 bank->ubh_en = UMC_BANK_HASH_GET_EN(val);
3240 }
3241 }
3242
3243 if ((flags & UMC_CHAN_HASH_F_RM) != 0) {
3244 for (uint_t i = 0; i < ZEN_UMC_MAX_CHAN_RM_HASH; i++) {
3245 uint64_t addr;
3246 umc_addr_hash_t *rm = &chash->uch_rm_hashes[i];
3247
3248 if (ddr4) {
3249 reg = UMC_RANK_HASH_DDR4(id, i);
3250 } else {
3251 reg = UMC_RANK_HASH_DDR5(id, i);
3252 }
3253
3254 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg,
3255 &val)) != 0) {
3256 dev_err(umc->umc_dip, CE_WARN, "failed to read "
3257 "rm hash register %x: %d",
3258 SMN_REG_ADDR(reg), ret);
3259 return (B_FALSE);
3260 }
3261
3262 addr = UMC_RANK_HASH_GET_ADDR(val);
3263 rm->uah_addr_xor = addr << UMC_RANK_HASH_SHIFT;
3264 rm->uah_en = UMC_RANK_HASH_GET_EN(val);
3265
3266 if (ddr4 || (umc->umc_fdata->zufd_flags &
3267 ZEN_UMC_FAM_F_UMC_EADDR) == 0) {
3268 continue;
3269 }
3270
3271 reg = UMC_RANK_HASH_EXT_DDR5(id, i);
3272 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg,
3273 &val)) != 0) {
3274 dev_err(umc->umc_dip, CE_WARN, "failed to read "
3275 "rm hash ext register %x: %d",
3276 SMN_REG_ADDR(reg), ret);
3277 return (B_FALSE);
3278 }
3279
3280 addr = UMC_RANK_HASH_EXT_GET_ADDR(val);
3281 rm->uah_addr_xor |= addr <<
3282 UMC_RANK_HASH_EXT_ADDR_SHIFT;
3283 }
3284 }
3285
3286 if ((flags & UMC_CHAN_HASH_F_PC) != 0) {
3287 umc_pc_hash_t *pc = &chash->uch_pc_hash;
3288
3289 if (ddr4) {
3290 reg = UMC_PC_HASH_DDR4(id);
3291 } else {
3292 reg = UMC_PC_HASH_DDR5(id);
3293 }
3294
3295 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
3296 dev_err(umc->umc_dip, CE_WARN, "failed to read pc hash "
3297 "register %x: %d", SMN_REG_ADDR(reg), ret);
3298 return (B_FALSE);
3299 }
3300
3301 pc->uph_row_xor = UMC_PC_HASH_GET_ROW(val);
3302 pc->uph_col_xor = UMC_PC_HASH_GET_COL(val);
3303 pc->uph_en = UMC_PC_HASH_GET_EN(val);
3304
3305 if (ddr4) {
3306 reg = UMC_PC_HASH2_DDR4(id);
3307 } else {
3308 reg = UMC_PC_HASH2_DDR5(id);
3309 }
3310
3311 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
3312 dev_err(umc->umc_dip, CE_WARN, "failed to read pc hash "
3313 "2 register %x: %d", SMN_REG_ADDR(reg), ret);
3314 return (B_FALSE);
3315 }
3316
3317 pc->uph_bank_xor = UMC_PC_HASH2_GET_BANK(val);
3318 }
3319
3320 if ((flags & UMC_CHAN_HASH_F_CS) != 0) {
3321 for (uint_t i = 0; i < ZEN_UMC_MAX_CHAN_CS_HASH; i++) {
3322 uint64_t addr;
3323 umc_addr_hash_t *rm = &chash->uch_cs_hashes[i];
3324
3325 if (ddr4) {
3326 reg = UMC_CS_HASH_DDR4(id, i);
3327 } else {
3328 reg = UMC_CS_HASH_DDR5(id, i);
3329 }
3330
3331 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg,
3332 &val)) != 0) {
3333 dev_err(umc->umc_dip, CE_WARN, "failed to read "
3334 "cs hash register %x", SMN_REG_ADDR(reg));
3335 return (B_FALSE);
3336 }
3337
3338 addr = UMC_CS_HASH_GET_ADDR(val);
3339 rm->uah_addr_xor = addr << UMC_CS_HASH_SHIFT;
3340 rm->uah_en = UMC_CS_HASH_GET_EN(val);
3341
3342 if (ddr4 || (umc->umc_fdata->zufd_flags &
3343 ZEN_UMC_FAM_F_UMC_EADDR) == 0) {
3344 continue;
3345 }
3346
3347 reg = UMC_CS_HASH_EXT_DDR5(id, i);
3348 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg,
3349 &val)) != 0) {
3350 dev_err(umc->umc_dip, CE_WARN, "failed to read "
3351 "cs hash ext register %x",
3352 SMN_REG_ADDR(reg));
3353 return (B_FALSE);
3354 }
3355
3356 addr = UMC_CS_HASH_EXT_GET_ADDR(val);
3357 rm->uah_addr_xor |= addr << UMC_CS_HASH_EXT_ADDR_SHIFT;
3358 }
3359 }
3360
3361 return (B_TRUE);
3362 }
3363
3364 /*
3365 * This fills in settings that we care about which are valid for the entire
3366 * channel and are the same between DDR4/5 capable devices.
3367 */
3368 static boolean_t
zen_umc_fill_chan(zen_umc_t * umc,zen_umc_df_t * df,zen_umc_chan_t * chan)3369 zen_umc_fill_chan(zen_umc_t *umc, zen_umc_df_t *df, zen_umc_chan_t *chan)
3370 {
3371 uint32_t val;
3372 smn_reg_t reg;
3373 const uint32_t id = chan->chan_logid;
3374 int ret;
3375 boolean_t ddr4;
3376
3377 if (umc->umc_fdata->zufd_umc_style == ZEN_UMC_UMC_S_DDR4 ||
3378 umc->umc_fdata->zufd_umc_style == ZEN_UMC_UMC_S_DDR4_APU) {
3379 ddr4 = B_TRUE;
3380 } else {
3381 ddr4 = B_FALSE;
3382 }
3383
3384 /*
3385 * Begin by gathering all of the information related to hashing. What is
3386 * valid here varies based on the actual chip family and then the
3387 * registers vary based on DDR4 and DDR5.
3388 */
3389 if (!zen_umc_fill_chan_hash(umc, df, chan, ddr4)) {
3390 return (B_FALSE);
3391 }
3392
3393 reg = UMC_UMCCFG(id);
3394 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
3395 dev_err(umc->umc_dip, CE_WARN, "failed to read UMC "
3396 "configuration register %x: %d", SMN_REG_ADDR(reg), ret);
3397 return (B_FALSE);
3398 }
3399
3400 chan->chan_umccfg_raw = val;
3401 if (UMC_UMCCFG_GET_ECC_EN(val)) {
3402 chan->chan_flags |= UMC_CHAN_F_ECC_EN;
3403 }
3404
3405 /*
3406 * Grab the DRAM configuration register. This can be used to determine
3407 * the frequency and speed of the memory channel. At this time we only
3408 * capture Memory P-state 0.
3409 */
3410 reg = UMC_DRAMCFG(id, 0);
3411
3412 /*
3413 * This register contains information to determine the type of DIMM.
3414 * All DIMMs in the channel must be the same type so we leave this
3415 * setting on the channel. Once we have that, we proceed to obtain the
3416 * currently configuration information for the DRAM in each memory
3417 * P-state.
3418 */
3419 zen_umc_fill_ddr_type(umc, chan);
3420 for (uint_t i = 0; i < ZEN_UMC_NMEM_PSTATES; i++) {
3421 chan->chan_clock[i] = ZEN_UMC_UNKNOWN_FREQ;
3422 chan->chan_speed[i] = ZEN_UMC_UNKNOWN_FREQ;
3423
3424 reg = UMC_DRAMCFG(id, i);
3425 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
3426 dev_err(umc->umc_dip, CE_WARN, "failed to read DRAM "
3427 "Configuration register P-state %u %x: %d", i,
3428 SMN_REG_ADDR(reg), ret);
3429 return (B_FALSE);
3430 }
3431 chan->chan_dramcfg_raw[i] = val;
3432
3433 zen_umc_fill_chan_freq(umc, chan, i);
3434 }
3435
3436 /*
3437 * Grab data that we can use to determine if we're scrambling or
3438 * encrypting regions of memory.
3439 */
3440 reg = UMC_DATACTL(id);
3441 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
3442 dev_err(umc->umc_dip, CE_WARN, "failed to read data control "
3443 "register %x: %d", SMN_REG_ADDR(reg), ret);
3444 return (B_FALSE);
3445 }
3446 chan->chan_datactl_raw = val;
3447 if (UMC_DATACTL_GET_SCRAM_EN(val)) {
3448 chan->chan_flags |= UMC_CHAN_F_SCRAMBLE_EN;
3449 }
3450
3451 if (UMC_DATACTL_GET_ENCR_EN(val)) {
3452 chan->chan_flags |= UMC_CHAN_F_ENCR_EN;
3453 }
3454
3455 /*
3456 * At the moment we snapshot the raw ECC control information. When we do
3457 * further work of making this a part of the MCA/X decoding, we'll want
3458 * to further take this apart for syndrome decoding. Until then, simply
3459 * cache it for future us and observability.
3460 */
3461 reg = UMC_ECCCTL(id);
3462 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
3463 dev_err(umc->umc_dip, CE_WARN, "failed to read ECC control "
3464 "register %x: %d", SMN_REG_ADDR(reg), ret);
3465 return (B_FALSE);
3466 }
3467 chan->chan_eccctl_raw = val;
3468
3469 /*
3470 * Read and snapshot the UMC capability registers for debugging in the
3471 * future.
3472 */
3473 reg = UMC_UMCCAP(id);
3474 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
3475 dev_err(umc->umc_dip, CE_WARN, "failed to read UMC cap"
3476 "register %x: %d", SMN_REG_ADDR(reg), ret);
3477 return (B_FALSE);
3478 }
3479 chan->chan_umccap_raw = val;
3480
3481 reg = UMC_UMCCAP_HI(id);
3482 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
3483 dev_err(umc->umc_dip, CE_WARN, "failed to read UMC cap high "
3484 "register %x: %d", SMN_REG_ADDR(reg), ret);
3485 return (B_FALSE);
3486 }
3487 chan->chan_umccap_hi_raw = val;
3488
3489 return (B_TRUE);
3490 }
3491
3492 static int
zen_umc_fill_umc_cb(const uint_t dfno,const uint32_t fabid,const uint32_t instid,void * arg)3493 zen_umc_fill_umc_cb(const uint_t dfno, const uint32_t fabid,
3494 const uint32_t instid, void *arg)
3495 {
3496 zen_umc_t *umc = arg;
3497 zen_umc_df_t *df = &umc->umc_dfs[dfno];
3498 zen_umc_chan_t *chan = &df->zud_chan[df->zud_nchan];
3499
3500 df->zud_nchan++;
3501 VERIFY3U(df->zud_nchan, <=, ZEN_UMC_MAX_UMCS);
3502
3503 /*
3504 * The data fabric is generally organized such that all UMC entries
3505 * should be continuous in their fabric ID space; however, we don't
3506 * want to rely on specific ID locations. The UMC SMN addresses are
3507 * organized in a relative order. To determine the SMN ID to use (the
3508 * chan_logid) we assume the iteration order will always be from the
3509 * lowest Instance ID to the highest Instance ID. But using the
3510 * iteration index is not enough as there's still an unstated assumption
3511 * that we'll encounter all the UMCs -- even those with no DIMMs
3512 * populated. While this previously seemed like a reasonable assumption
3513 * (every system in question behaved as such), it is seemingly no longer
3514 * always the case:
3515 *
3516 * On a 12-channel SP5 system (running either Genoa or Turin), the DF
3517 * reports 16 CS entities (of which 12 should be the UMCs). But with
3518 * DIMMs only in channels A and G (each of which are mapped to different
3519 * UMCs and not necessarily in alphabetic order), we only discover
3520 * 2 UMCs and so end up with something like:
3521 * zud_nchan = 2
3522 * zud_chan[0].chan_instid = 3 // (A)
3523 * zud_chan[1].chan_instid = 9 // (G)
3524 *
3525 * Attempting to use the logical zud_chan index (0/1) as the SMN ID
3526 * for the UMC registers returns misleading results, e.g., our DIMM
3527 * presence check claims there are none whereas using the Instance
3528 * IDs (3/9) returns the correct results.
3529 *
3530 * Taking that all into account then we arrive at
3531 * chan_logid = chan_instid - <Base UMC Instance ID>
3532 *
3533 * Unfortunately though, there's no way to determine what that base ID
3534 * should be programmatically and so we hardcore it as part of the
3535 * static per SoC family data.
3536 */
3537 chan->chan_logid = instid - umc->umc_fdata->zufd_base_instid;
3538 chan->chan_fabid = fabid;
3539 chan->chan_instid = instid;
3540 chan->chan_nrules = umc->umc_fdata->zufd_cs_nrules;
3541 for (uint_t i = 0; i < umc->umc_fdata->zufd_cs_nrules; i++) {
3542 if (zen_umc_read_dram_rule(umc, dfno, instid, i,
3543 &chan->chan_rules[i]) != 0) {
3544 return (-1);
3545 }
3546 }
3547
3548 for (uint_t i = 0; i < umc->umc_fdata->zufd_cs_nrules - 1; i++) {
3549 int ret;
3550 uint32_t offset;
3551 uint64_t t;
3552 df_reg_def_t off_reg;
3553 chan_offset_t *offp = &chan->chan_offsets[i];
3554
3555 switch (umc->umc_df_rev) {
3556 case DF_REV_2:
3557 case DF_REV_3:
3558 case DF_REV_3P5:
3559 ASSERT3U(i, ==, 0);
3560 off_reg = DF_DRAM_OFFSET_V2;
3561 break;
3562 case DF_REV_4:
3563 case DF_REV_4D2:
3564 off_reg = DF_DRAM_OFFSET_V4(i);
3565 break;
3566 default:
3567 dev_err(umc->umc_dip, CE_WARN, "!encountered "
3568 "unsupported DF revision processing DRAM Offsets: "
3569 "0x%x", umc->umc_df_rev);
3570 return (-1);
3571 }
3572
3573 if ((ret = amdzen_c_df_read32(dfno, instid, off_reg,
3574 &offset)) != 0) {
3575 dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM "
3576 "offset %u on 0x%x/0x%x: %d", i, dfno, instid, ret);
3577 return (-1);
3578 }
3579
3580 offp->cho_raw = offset;
3581 offp->cho_valid = DF_DRAM_OFFSET_GET_EN(offset);
3582
3583 switch (umc->umc_df_rev) {
3584 case DF_REV_2:
3585 t = DF_DRAM_OFFSET_V2_GET_OFFSET(offset);
3586 break;
3587 case DF_REV_3:
3588 case DF_REV_3P5:
3589 t = DF_DRAM_OFFSET_V3_GET_OFFSET(offset);
3590 break;
3591 case DF_REV_4:
3592 case DF_REV_4D2:
3593 t = DF_DRAM_OFFSET_V4_GET_OFFSET(offset);
3594 break;
3595 default:
3596 dev_err(umc->umc_dip, CE_WARN, "!encountered "
3597 "unsupported DF revision processing DRAM Offsets: "
3598 "0x%x", umc->umc_df_rev);
3599 return (-1);
3600 }
3601 offp->cho_offset = t << DF_DRAM_OFFSET_SHIFT;
3602 }
3603
3604 /*
3605 * If this platform supports our favorete Zen 3 6-channel hash special
3606 * then we need to grab the NP2 configuration registers. This will only
3607 * be referenced if this channel is actually being used for a 6-channel
3608 * hash, so even if the contents are weird that should still be ok.
3609 */
3610 if ((umc->umc_fdata->zufd_flags & ZEN_UMC_FAM_F_NP2) != 0) {
3611 uint32_t np2;
3612 int ret;
3613
3614 if ((ret = amdzen_c_df_read32(dfno, instid, DF_NP2_CONFIG_V3,
3615 &np2)) != 0) {
3616 dev_err(umc->umc_dip, CE_WARN, "!failed to read NP2 "
3617 "config: %d", ret);
3618 return (-1);
3619 }
3620
3621 chan->chan_np2_raw = np2;
3622 chan->chan_np2_space0 = DF_NP2_CONFIG_V3_GET_SPACE0(np2);
3623 }
3624
3625 /*
3626 * Now that we have everything we need from the data fabric, read out
3627 * the rest of what we need from the UMC channel data in SMN register
3628 * space.
3629 */
3630 switch (umc->umc_fdata->zufd_umc_style) {
3631 case ZEN_UMC_UMC_S_DDR4:
3632 case ZEN_UMC_UMC_S_DDR4_APU:
3633 for (uint_t i = 0; i < ZEN_UMC_MAX_DIMMS; i++) {
3634 if (!zen_umc_fill_chan_dimm_ddr4(umc, df, chan, i)) {
3635 return (-1);
3636 }
3637 }
3638 break;
3639 case ZEN_UMC_UMC_S_HYBRID_LPDDR5:
3640 case ZEN_UMC_UMC_S_DDR5:
3641 case ZEN_UMC_UMC_S_DDR5_APU:
3642 for (uint_t i = 0; i < ZEN_UMC_MAX_DIMMS; i++) {
3643 for (uint_t r = 0; r < ZEN_UMC_MAX_CS_PER_DIMM; r++) {
3644 if (!zen_umc_fill_chan_rank_ddr5(umc, df, chan,
3645 i, r)) {
3646 return (-1);
3647 }
3648 }
3649 }
3650 break;
3651 default:
3652 dev_err(umc->umc_dip, CE_WARN, "!encountered unsupported "
3653 "Zen family: 0x%x", umc->umc_fdata->zufd_umc_style);
3654 return (-1);
3655 }
3656
3657 if (!zen_umc_fill_chan(umc, df, chan)) {
3658 return (-1);
3659 }
3660
3661 return (0);
3662 }
3663
3664 /*
3665 * Today there are no privileges for the memory controller information, it is
3666 * restricted based on file system permissions.
3667 */
3668 static int
zen_umc_open(dev_t * devp,int flag,int otyp,cred_t * credp)3669 zen_umc_open(dev_t *devp, int flag, int otyp, cred_t *credp)
3670 {
3671 zen_umc_t *umc = zen_umc;
3672
3673 if ((flag & (FEXCL | FNDELAY | FNONBLOCK | FWRITE)) != 0) {
3674 return (EINVAL);
3675 }
3676
3677 if (otyp != OTYP_CHR) {
3678 return (EINVAL);
3679 }
3680
3681 if (getminor(*devp) >= umc->umc_ndfs) {
3682 return (ENXIO);
3683 }
3684
3685 return (0);
3686 }
3687
3688 static void
zen_umc_ioctl_decode(zen_umc_t * umc,mc_encode_ioc_t * encode)3689 zen_umc_ioctl_decode(zen_umc_t *umc, mc_encode_ioc_t *encode)
3690 {
3691 zen_umc_decoder_t dec;
3692 uint32_t sock, die, comp;
3693
3694 bzero(&dec, sizeof (dec));
3695 if (!zen_umc_decode_pa(umc, encode->mcei_pa, &dec)) {
3696 encode->mcei_err = (uint32_t)dec.dec_fail;
3697 encode->mcei_errdata = dec.dec_fail_data;
3698 return;
3699 }
3700
3701 encode->mcei_errdata = 0;
3702 encode->mcei_err = 0;
3703 encode->mcei_chan_addr = dec.dec_norm_addr;
3704 encode->mcei_rank_addr = UINT64_MAX;
3705 encode->mcei_board = 0;
3706 zen_fabric_id_decompose(&umc->umc_decomp, dec.dec_targ_fabid, &sock,
3707 &die, &comp);
3708 encode->mcei_chip = sock;
3709 encode->mcei_die = die;
3710 encode->mcei_mc = dec.dec_umc_chan->chan_logid;
3711 encode->mcei_chan = 0;
3712 encode->mcei_dimm = dec.dec_dimm_no;
3713 encode->mcei_row = dec.dec_dimm_row;
3714 encode->mcei_column = dec.dec_dimm_col;
3715 /*
3716 * We don't have a logical rank that something matches to, we have the
3717 * actual chip-select and rank multiplication. If we could figure out
3718 * how to transform that into an actual rank, that'd be grand.
3719 */
3720 encode->mcei_rank = UINT8_MAX;
3721 encode->mcei_cs = dec.dec_dimm_csno;
3722 encode->mcei_rm = dec.dec_dimm_rm;
3723 encode->mcei_bank = dec.dec_dimm_bank;
3724 encode->mcei_bank_group = dec.dec_dimm_bank_group;
3725 encode->mcei_subchan = dec.dec_dimm_subchan;
3726 }
3727
3728 static void
umc_decoder_pack(zen_umc_t * umc)3729 umc_decoder_pack(zen_umc_t *umc)
3730 {
3731 char *buf = NULL;
3732 size_t len = 0;
3733
3734 ASSERT(MUTEX_HELD(&umc->umc_nvl_lock));
3735 if (umc->umc_decoder_buf != NULL) {
3736 return;
3737 }
3738
3739 if (umc->umc_decoder_nvl == NULL) {
3740 umc->umc_decoder_nvl = zen_umc_dump_decoder(umc);
3741 if (umc->umc_decoder_nvl == NULL) {
3742 return;
3743 }
3744 }
3745
3746 if (nvlist_pack(umc->umc_decoder_nvl, &buf, &len, NV_ENCODE_XDR,
3747 KM_NOSLEEP_LAZY) != 0) {
3748 return;
3749 }
3750
3751 umc->umc_decoder_buf = buf;
3752 umc->umc_decoder_len = len;
3753 }
3754
3755 static int
zen_umc_ioctl(dev_t dev,int cmd,intptr_t arg,int mode,cred_t * credp,int * rvalp)3756 zen_umc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
3757 int *rvalp)
3758 {
3759 int ret;
3760 zen_umc_t *umc = zen_umc;
3761 mc_encode_ioc_t encode;
3762 mc_snapshot_info_t info;
3763
3764 if (getminor(dev) >= umc->umc_ndfs) {
3765 return (ENXIO);
3766 }
3767
3768 switch (cmd) {
3769 case MC_IOC_DECODE_PA:
3770 if (crgetzoneid(credp) != GLOBAL_ZONEID ||
3771 drv_priv(credp) != 0) {
3772 ret = EPERM;
3773 break;
3774 }
3775
3776 if (ddi_copyin((void *)arg, &encode, sizeof (encode),
3777 mode & FKIOCTL) != 0) {
3778 ret = EFAULT;
3779 break;
3780 }
3781
3782 zen_umc_ioctl_decode(umc, &encode);
3783 ret = 0;
3784
3785 if (ddi_copyout(&encode, (void *)arg, sizeof (encode),
3786 mode & FKIOCTL) != 0) {
3787 ret = EFAULT;
3788 break;
3789 }
3790 break;
3791 case MC_IOC_DECODE_SNAPSHOT_INFO:
3792 mutex_enter(&umc->umc_nvl_lock);
3793 umc_decoder_pack(umc);
3794
3795 if (umc->umc_decoder_buf == NULL) {
3796 mutex_exit(&umc->umc_nvl_lock);
3797 ret = EIO;
3798 break;
3799 }
3800
3801 if (umc->umc_decoder_len > UINT32_MAX) {
3802 mutex_exit(&umc->umc_nvl_lock);
3803 ret = EOVERFLOW;
3804 break;
3805 }
3806
3807 info.mcs_size = umc->umc_decoder_len;
3808 info.mcs_gen = 0;
3809 if (ddi_copyout(&info, (void *)arg, sizeof (info),
3810 mode & FKIOCTL) != 0) {
3811 mutex_exit(&umc->umc_nvl_lock);
3812 ret = EFAULT;
3813 break;
3814 }
3815
3816 mutex_exit(&umc->umc_nvl_lock);
3817 ret = 0;
3818 break;
3819 case MC_IOC_DECODE_SNAPSHOT:
3820 mutex_enter(&umc->umc_nvl_lock);
3821 umc_decoder_pack(umc);
3822
3823 if (umc->umc_decoder_buf == NULL) {
3824 mutex_exit(&umc->umc_nvl_lock);
3825 ret = EIO;
3826 break;
3827 }
3828
3829 if (ddi_copyout(umc->umc_decoder_buf, (void *)arg,
3830 umc->umc_decoder_len, mode & FKIOCTL) != 0) {
3831 mutex_exit(&umc->umc_nvl_lock);
3832 ret = EFAULT;
3833 break;
3834 }
3835
3836 mutex_exit(&umc->umc_nvl_lock);
3837 ret = 0;
3838 break;
3839 default:
3840 ret = ENOTTY;
3841 break;
3842 }
3843
3844 return (ret);
3845 }
3846
3847 static int
zen_umc_close(dev_t dev,int flag,int otyp,cred_t * credp)3848 zen_umc_close(dev_t dev, int flag, int otyp, cred_t *credp)
3849 {
3850 return (0);
3851 }
3852
3853 static void
zen_umc_cleanup(zen_umc_t * umc)3854 zen_umc_cleanup(zen_umc_t *umc)
3855 {
3856 nvlist_free(umc->umc_decoder_nvl);
3857 umc->umc_decoder_nvl = NULL;
3858 if (umc->umc_decoder_buf != NULL) {
3859 kmem_free(umc->umc_decoder_buf, umc->umc_decoder_len);
3860 umc->umc_decoder_buf = NULL;
3861 umc->umc_decoder_len = 0;
3862 }
3863
3864 if (umc->umc_dip != NULL) {
3865 ddi_remove_minor_node(umc->umc_dip, NULL);
3866 }
3867 mutex_destroy(&umc->umc_nvl_lock);
3868 kmem_free(umc, sizeof (zen_umc_t));
3869 }
3870
3871 static int
zen_umc_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)3872 zen_umc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
3873 {
3874 int ret;
3875 zen_umc_t *umc;
3876
3877 if (cmd == DDI_RESUME) {
3878 return (DDI_SUCCESS);
3879 } else if (cmd != DDI_ATTACH) {
3880 return (DDI_FAILURE);
3881 }
3882 if (zen_umc != NULL) {
3883 dev_err(dip, CE_WARN, "!zen_umc is already attached to a "
3884 "dev_info_t: %p", zen_umc->umc_dip);
3885 return (DDI_FAILURE);
3886 }
3887
3888 /*
3889 * To get us going, we need to do several bits of set up. First, we need
3890 * to use the knowledge about the actual hardware that we're using to
3891 * encode a bunch of different data:
3892 *
3893 * o The set of register styles and extra hardware features that exist
3894 * on the hardware platform.
3895 * o The number of actual rules there are for the CCMs and UMCs.
3896 * o How many actual things exist (DFs, etc.)
3897 * o Useful fabric and instance IDs for all of the different UMC
3898 * entries so we can actually talk to them.
3899 *
3900 * Only once we have all the above will we go dig into the actual data.
3901 */
3902 umc = kmem_zalloc(sizeof (zen_umc_t), KM_SLEEP);
3903 mutex_init(&umc->umc_nvl_lock, NULL, MUTEX_DRIVER, NULL);
3904 umc->umc_family = chiprev_family(cpuid_getchiprev(CPU));
3905 umc->umc_ndfs = amdzen_c_df_count();
3906 umc->umc_dip = dip;
3907
3908 if (!zen_umc_identify(umc)) {
3909 dev_err(dip, CE_WARN, "!encountered unsupported CPU");
3910 goto err;
3911 }
3912
3913 umc->umc_df_rev = amdzen_c_df_rev();
3914 switch (umc->umc_df_rev) {
3915 case DF_REV_2:
3916 case DF_REV_3:
3917 case DF_REV_3P5:
3918 case DF_REV_4:
3919 case DF_REV_4D2:
3920 break;
3921 default:
3922 dev_err(dip, CE_WARN, "!encountered unknown DF revision: %x",
3923 umc->umc_df_rev);
3924 goto err;
3925 }
3926
3927 if ((ret = amdzen_c_df_fabric_decomp(&umc->umc_decomp)) != 0) {
3928 dev_err(dip, CE_WARN, "!failed to get fabric decomposition: %d",
3929 ret);
3930 }
3931
3932 umc->umc_tom = rdmsr(MSR_AMD_TOM);
3933 umc->umc_tom2 = rdmsr(MSR_AMD_TOM2);
3934
3935 /*
3936 * For each DF, start by reading all of the data that we need from it.
3937 * This involves finding a target CCM, reading all of the rules,
3938 * ancillary settings, and related. Then we'll do a pass over all of the
3939 * actual UMC targets there.
3940 */
3941 for (uint_t i = 0; i < umc->umc_ndfs; i++) {
3942 if (amdzen_c_df_iter(i, ZEN_DF_TYPE_CCM_CPU,
3943 zen_umc_fill_ccm_cb, umc) < 0 ||
3944 amdzen_c_df_iter(i, ZEN_DF_TYPE_CS_UMC, zen_umc_fill_umc_cb,
3945 umc) != 0) {
3946 goto err;
3947 }
3948 }
3949
3950 /*
3951 * Create a minor node for each df that we encounter.
3952 */
3953 for (uint_t i = 0; i < umc->umc_ndfs; i++) {
3954 int ret;
3955 char minor[64];
3956
3957 (void) snprintf(minor, sizeof (minor), "mc-umc-%u", i);
3958 if ((ret = ddi_create_minor_node(umc->umc_dip, minor, S_IFCHR,
3959 i, "ddi_mem_ctrl", 0)) != 0) {
3960 dev_err(dip, CE_WARN, "!failed to create minor %s: %d",
3961 minor, ret);
3962 goto err;
3963 }
3964 }
3965
3966 zen_umc = umc;
3967 return (DDI_SUCCESS);
3968
3969 err:
3970 zen_umc_cleanup(umc);
3971 return (DDI_FAILURE);
3972 }
3973
3974 static int
zen_umc_getinfo(dev_info_t * dip,ddi_info_cmd_t cmd,void * arg,void ** resultp)3975 zen_umc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp)
3976 {
3977 zen_umc_t *umc;
3978
3979 if (zen_umc == NULL || zen_umc->umc_dip == NULL) {
3980 return (DDI_FAILURE);
3981 }
3982 umc = zen_umc;
3983
3984 switch (cmd) {
3985 case DDI_INFO_DEVT2DEVINFO:
3986 *resultp = (void *)umc->umc_dip;
3987 break;
3988 case DDI_INFO_DEVT2INSTANCE:
3989 *resultp = (void *)(uintptr_t)ddi_get_instance(
3990 umc->umc_dip);
3991 break;
3992 default:
3993 return (DDI_FAILURE);
3994 }
3995 return (DDI_SUCCESS);
3996 }
3997
3998 static int
zen_umc_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)3999 zen_umc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
4000 {
4001 zen_umc_t *umc;
4002
4003 if (cmd == DDI_SUSPEND) {
4004 return (DDI_SUCCESS);
4005 } else if (cmd != DDI_DETACH) {
4006 return (DDI_FAILURE);
4007 }
4008
4009 if (zen_umc == NULL) {
4010 dev_err(dip, CE_WARN, "!asked to detach zen_umc, but it "
4011 "was never successfully attached");
4012 return (DDI_FAILURE);
4013 }
4014
4015 umc = zen_umc;
4016 zen_umc = NULL;
4017 zen_umc_cleanup(umc);
4018 return (DDI_SUCCESS);
4019 }
4020
4021 static struct cb_ops zen_umc_cb_ops = {
4022 .cb_open = zen_umc_open,
4023 .cb_close = zen_umc_close,
4024 .cb_strategy = nodev,
4025 .cb_print = nodev,
4026 .cb_dump = nodev,
4027 .cb_read = nodev,
4028 .cb_write = nodev,
4029 .cb_ioctl = zen_umc_ioctl,
4030 .cb_devmap = nodev,
4031 .cb_mmap = nodev,
4032 .cb_segmap = nodev,
4033 .cb_chpoll = nochpoll,
4034 .cb_prop_op = ddi_prop_op,
4035 .cb_flag = D_MP,
4036 .cb_rev = CB_REV,
4037 .cb_aread = nodev,
4038 .cb_awrite = nodev
4039 };
4040
4041 static struct dev_ops zen_umc_dev_ops = {
4042 .devo_rev = DEVO_REV,
4043 .devo_refcnt = 0,
4044 .devo_getinfo = zen_umc_getinfo,
4045 .devo_identify = nulldev,
4046 .devo_probe = nulldev,
4047 .devo_attach = zen_umc_attach,
4048 .devo_detach = zen_umc_detach,
4049 .devo_reset = nodev,
4050 .devo_quiesce = ddi_quiesce_not_needed,
4051 .devo_cb_ops = &zen_umc_cb_ops
4052 };
4053
4054 static struct modldrv zen_umc_modldrv = {
4055 .drv_modops = &mod_driverops,
4056 .drv_linkinfo = "AMD Zen Unified Memory Controller",
4057 .drv_dev_ops = &zen_umc_dev_ops
4058 };
4059
4060 static struct modlinkage zen_umc_modlinkage = {
4061 .ml_rev = MODREV_1,
4062 .ml_linkage = { &zen_umc_modldrv, NULL }
4063 };
4064
4065 int
_init(void)4066 _init(void)
4067 {
4068 return (mod_install(&zen_umc_modlinkage));
4069 }
4070
4071 int
_info(struct modinfo * modinfop)4072 _info(struct modinfo *modinfop)
4073 {
4074 return (mod_info(&zen_umc_modlinkage, modinfop));
4075 }
4076
4077 int
_fini(void)4078 _fini(void)
4079 {
4080 return (mod_remove(&zen_umc_modlinkage));
4081 }
4082