xref: /illumos-gate/usr/src/uts/intel/io/amdzen/zen_umc.c (revision 92279cb6e70fd12428e1d9e6270e7e2d877cbeec)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2025 Oxide Computer Company
14  */
15 
16 /*
17  * AMD Zen Unified Memory Controller Driver
18  *
19  * This file forms the core logic around transforming a physical address that
20  * we're used to using into a specific location on a DIMM. This has support for
21  * a wide range of AMD CPUs and APUs ranging from Zen 1 - Zen 4.
22  *
23  * The goal of this driver is to implement the infrastructure and support
24  * necessary to understand how DRAM requests are being routed in the system and
25  * to be able to map those to particular channels and then DIMMs. This is used
26  * as part of RAS (reliability, availability, and serviceability) to enable
27  * aspects around understanding ECC errors, hardware topology, and more. Like
28  * with any software project, there is more to do here. Please see the Future
29  * Work section at the end of this big theory statement for more information.
30  *
31  * -------------------
32  * Driver Organization
33  * -------------------
34  *
35  * This driver is organized into two major pieces:
36  *
37  *   1. Logic to interface with hardware, discover the data fabric, memory
38  *      controller configuration, and transform that into a normalized fashion
39  *      that can be used across all different Zen family CPUs. This is
40  *      implemented generally in this file, and is designed to assume it is in
41  *      the kernel (as it requires access to the SMN, DF PCI registers, and the
42  *      amdzen nexus driver client services).
43  *
44  *   2. Logic that can take the above normalized memory information and perform
45  *      decoding (e.g. physical address to DIMM information). This generally
46  *      lives in common/mc/zen_uc/zen_umc_decode.c. This file is in common/,
47  *      meaning it is designed to be shared by userland and the kernel. Even
48  *      more so, it is designed to operate on a const version of our primary
49  *      data structure (zen_umc_t), not allowing it to be modified. This allows
50  *      us to more easily unit test the decoding logic and utilize it in other
51  *      circumstances such as with the mcdecode utility.
52  *
53  * There is corresponding traditional dev_ops(9S) and cb_ops(9S) logic in the
54  * driver (currently this file) which take care of interfacing with the broader
55  * operating system environment.
56  *
57  * There is only ever one instance of this driver, e.g. it is a singleton in
58  * design pattern parlance. There is a single struct, the zen_umc_t found in the
59  * global (albeit static) variable zen_umc. This structure itself contains a
60  * hierarchical set of structures that describe the system. To make management
61  * of memory simpler, all of the nested structures that we discover from
62  * hardware are allocated in the same structure. The only exception to this rule
63  * is when we cache serialized nvlists for dumping.
64  *
65  * The organization of the structures inside the zen_umc_t, generally mimics the
66  * hardware organization and is structured as follows:
67  *
68  *   +-----------+
69  *   | zen_umc_t |
70  *   +-----------+
71  *        |
72  *        +-------------------------------+
73  *        v                               v
74  *   +--------------+             +--------------+        One instance of the
75  *   | zen_umc_df_t |     ...     | zen_umc_df_t |        zen_umc_df_t per
76  *   +--------------+             +--------------+        discovered DF.
77  *     |||
78  *     |||
79  *     |||    +----------------+         +----------------+  Global DRAM
80  *     ||+--->| df_dram_rule_t |   ...   | df_dram_rule_t |  rules for the
81  *     ||     +----------------+         +----------------+  platform.
82  *     ||
83  *     ||    +--------------------+       +--------------------+  UMC remap
84  *     |+--->| zen_umc_cs_remap_t |  ...  | zen_umc_cs_remap_t |  rule arrays.
85  *     |     +--------------------+       +--------------------+
86  *     |
87  *     v
88  *    +----------------+         +----------------+   One structure per
89  *    | zen_umc_chan_t |   ...   | zen_umc_chan_t |   discovered DDR4/5
90  *    +----------------+         +----------------+   memory channel.
91  *     ||||
92  *     ||||
93  *     ||||    +----------------+       +----------------+   Channel specific
94  *     |||+--->| df_dram_rule_t |  ...  | df_dram_rule_t |   copy of DRAM rules.
95  *     |||     +----------------+       +----------------+   Less than global.
96  *     |||
97  *     |||     +---------------+       +---------------+   Per-Channel DRAM
98  *     ||+---->| chan_offset_t |  ...  | chan_offset_t |   offset that is used
99  *     ||      +---------------+       +---------------+   for normalization.
100  *     ||
101  *     ||      +-----------------+                         Channel-specific
102  *     |+----->| umc_chan_hash_t |                         hashing rules.
103  *     |       +-----------------+
104  *     |
105  *     |       +------------+         +------------+    One structure for
106  *     +------>| umc_dimm_t |   ...   | umc_dimm_t |    each DIMM in the
107  *             +------------+         +------------+    channel. Always two.
108  *                |
109  *                |     +----------+         +----------+   Per chip-select
110  *                +---> | umc_cs_t |   ...   | umc_cs_t |   data. Always two.
111  *                      +----------+         +----------+
112  *
113  * In the data structures themselves you'll often find several pieces of data
114  * that have the term 'raw' in their name. The point of these is to basically
115  * capture the original value that we read from the register before processing
116  * it. These are generally used either for debugging or to help answer future
117  * curiosity with resorting to the udf and usmn tooling, which hopefully aren't
118  * actually installed on systems.
119  *
120  * With the exception of some of the members in the zen_umc_t that are around
121  * management of state for userland ioctls, everything in the structure is
122  * basically write-once and from that point on should be treated as read-only.
123  *
124  * ---------------
125  * Memory Decoding
126  * ---------------
127  *
128  * To understand the process of memory decoding, it's worth going through and
129  * understanding a bunch of the terminology that is used in this process. As an
130  * additional reference when understanding this, you may want to turn to either
131  * an older generation AMD BIOS and Kernel Developer's Guide or the more current
132  * Processor Programming Reference. In addition, the imc driver, which is the
133  * Intel equivalent, also provides an additional bit of reference.
134  *
135  * SYSTEM ADDRESS
136  *
137  *	This is a physical address and is the way that the operating system
138  *	normally thinks of memory. System addresses can refer to many different
139  *	things. For example, you have traditional DRAM, memory-mapped PCIe
140  *	devices, peripherals that the processor exposes such as the xAPIC, data
141  *	from the FCH (Fusion Controller Hub), etc.
142  *
143  * TOM, TOM2, and the DRAM HOLE
144  *
145  *	Physical memory has a complicated layout on x86 in part because of
146  *	support for traditional 16-bit and 32-bit systems. As a result, contrary
147  *	to popular belief, DRAM is not at a consistent address range in the
148  *	processor. AMD processors have a few different ranges. There is a 32-bit
149  *	region that starts at effectively physical address zero and goes to the
150  *	TOM MSR (top of memory -- Core::X86::Msr::TOP_MEM). This indicates a
151  *	limit below 4 GiB, generally around 2 GiB.
152  *
153  *	From there, the next region of DRAM starts at 4 GiB and goes to TOM2
154  *	(top of memory 2 -- Core::X86::Msr::TOM2). The region between TOM and
155  *	4 GiB is called the DRAM hole. Physical addresses in this region are
156  *	used for memory mapped I/O. This breaks up contiguous physical
157  *	addresses being used for DRAM, creating a "hole".
158  *
159  * DATA FABRIC
160  *
161  *	The data fabric (DF) is the primary interface that different parts of
162  *	the system use to communicate with one another. This includes the I/O
163  *	engines (where PCIe traffic goes), CPU caches and their cores, memory
164  *	channels, cross-socket communication, and a whole lot more. The first
165  *	part of decoding addresses and figuring out which DRAM channel an
166  *	address should be directed to all come from the data fabric.
167  *
168  *	The data fabric is comprised of instances. So there is one instance for
169  *	each group of cores, each memory channel, etc. Each instance has its own
170  *	independent set of register information. As the data fabric is a series
171  *	of devices exposed over PCI, if you do a normal PCI configuration space
172  *	read or write that'll end up broadcasting the I/O. Instead, to access a
173  *	particular instance's register information there is an indirect access
174  *	mechanism. The primary way that this driver accesses data fabric
175  *	registers is via these indirect reads.
176  *
177  *	There is one instance of the Data Fabric per socket starting with Zen 2.
178  *	In Zen 1, there was one instance of the data fabric per CCD -- core
179  *	complex die (see cpuid.c's big theory statement for more information).
180  *
181  * DF INSTANCE ID
182  *
183  *	A DF instance ID is an identifier for a single entity or component in a
184  *	data fabric.  The set of instance IDs is unique only with a single data
185  *	fabric. So for example, each memory channel, I/O endpoint (e.g. PCIe
186  *	logic), group of cores, has its own instance ID. Anything within the
187  *	same data fabric (e.g. the same die) can be reached via its instance ID.
188  *	The instance ID is used to indicate which instance to contact when
189  *	performing indirect accesses.
190  *
191  *	Not everything that has an instance ID will be globally routable (e.g.
192  *	between multiple sockets). For things that are, such as the memory
193  *	channels and coherent core initiators, there is a second ID called a
194  *	fabric ID.
195  *
196  * DF FABRIC ID
197  *
198  *	A DF fabric ID is an identifier that combines information to indicate
199  *	both which instance of the data fabric a component is on and a component
200  *	itself. So with this number you can distinguish between a memory channel
201  *	on one of two sockets. A Fabric ID is made up of two parts. The upper
202  *	part indicates which DF we are talking to and is referred to as a Node
203  *	ID. The Node ID is itself broken into two parts: one that identifies a
204  *	socket, and one that identifies a die. The lower part of a fabric ID is
205  *	called a component ID and indicates which component in a particular data
206  *	fabric that we are talking to. While only a subset of the total
207  *	components in the data fabric are routable, for everything that is, its
208  *	component ID matches its instance ID.
209  *
210  *	Put differently, the component portion of a fabric ID and a component's
211  *	instance ID are always the same for routable entities. For things which
212  *	cannot be routed, they only have an instance ID and no fabric ID.
213  *	Because this code is always interacting with data fabric components that
214  *	are routable, sometimes instance ID and the component ID portion of the
215  *	data fabric ID may be used interchangeably.
216  *
217  *	Finally, it's worth calling out that the number of bits that are used to
218  *	indicate the socket, die, and component in a fabric ID changes from
219  *	hardware generation to hardware generation.
220  *
221  *	Inside the code here, the socket and die decomposition information is
222  *	always relative to the node ID. AMD phrases the decomposition
223  *	information in terms of a series of masks and shifts. This is
224  *	information that can be retrieved from the data fabric itself, allowing
225  *	us to avoid hardcoding too much information other than which registers
226  *	actually have which fields. With both masks and shifts, it's important
227  *	to establish which comes first. We follow AMD's convention and always
228  *	apply masks before shifts. With that, let's look at an example of a
229  *	made up bit set:
230  *
231  *	Assumptions (to make this example simple):
232  *	  o The fabric ID is 16 bits
233  *	  o The component ID is 8 bits
234  *	  o The node ID is 8 bits
235  *	  o The socket and die ID are both 4 bits
236  *
237  *	Here, let's say that we have the ID 0x2106. This decomposes into a
238  *	socket 0x2, die 0x1, and component 0x6. Here is how that works in more
239  *	detail:
240  *
241  *	          0x21      0x06
242  *	        |------|  |------|
243  *	        Node ID   Component ID
244  *	Mask:    0xff00    0x00ff
245  *	Shift:   8         0
246  *
247  *	Next we would decompose the Node ID as:
248  *	         0x2        0x1
249  *	       |------|  |------|
250  *	       Sock ID    Die ID
251  *	Mask:   0xf0      0x0f
252  *	Shift:  4         0
253  *
254  *	Composing a fabric ID from its parts would work in a similar way by
255  *	applying masks and shifts.
256  *
257  * NORMAL ADDRESS
258  *
259  *	A normal address is one of the primary address types that AMD uses in
260  *	memory decoding. It takes into account the DRAM hole, interleave
261  *	settings, and is basically the address that is dispatched to the broader
262  *	data fabric towards a particular DRAM channel.
263  *
264  *	Often, phrases like 'normalizing the address' or normalization refer to
265  *	the process of transforming a system address into the channel address.
266  *
267  * INTERLEAVING
268  *
269  *	The idea of interleaving is to take a contiguous range and weave it
270  *	between multiple different actual entities. Generally certain bits in
271  *	the range are used to select one of several smaller regions. For
272  *	example, if you have 8 regions each that are 4 GiB in size, that creates
273  *	a single 32 GiB region. You can use three bits in that 32 GiB space to
274  *	select one of the 8 regions. For a more visual example, see the
275  *	definition of this in uts/intel/io/imc/imc.c.
276  *
277  * CHANNEL
278  *
279  *	A channel is used to refer to a single memory channel. This is sometimes
280  *	called a DRAM channel as well. A channel operates in a specific mode
281  *	based on the JEDEC DRAM standards (e.g. DDR4, LPDDR5, etc.). A
282  *	(LP)DDR4/5 channel may support up to two DIMMs inside the channel. The
283  *	number of slots is platform dependent and from there the number of DIMMs
284  *	installed can vary. Generally speaking, a DRAM channel defines a set
285  *	number of signals, most of which go to all DIMMs in the channel, what
286  *	varies is which "chip-select" is activated which causes a given DIMM to
287  *	pay attention or not.
288  *
289  * DIMM
290  *
291  *	A DIMM refers to a physical hardware component that is installed into a
292  *	computer to provide access to dynamic memory. Originally this stood for
293  *	dual-inline memory module, though the DIMM itself has evolved beyond
294  *	that. A DIMM is organized into various pages, which are addressed by
295  *	a combination of rows, columns, banks, bank groups, and ranks. How this
296  *	fits together changes from generation to generation and is standardized
297  *	in something like DDR4, LPDDR4, DDR5, LPDDR5, etc. These standards
298  *	define the general individual modules that are assembled into a DIMM.
299  *	There are slightly different standards for combined memory modules
300  *	(which is what we use the term DIMM for). Examples of those include
301  *	things like registered DIMMs (RDIMMs).
302  *
303  *	A DDR4 DIMM contains a single channel that is 64-bits wide with 8 check
304  *	bits. A DDR5 DIMM has a notable change in this scheme from earlier DDR
305  *	standards. It breaks a single DDR5 DIMM into two sub-channels. Each
306  *	sub-channel is independently addressed and contains 32-bits of data and
307  *	8-bits of check data.
308  *
309  * ROW AND COLUMN
310  *
311  *	The most basic building block of a DIMM is a die. A DIMM consists of
312  *	multiple dies that are organized together (we'll discuss the
313  *	organization next). A given die is organized into a series of rows and
314  *	columns. First, one selects a row. At which point one is able to select
315  *	a specific column. It is more expensive to change rows than columns,
316  *	leading a given row to contain approximately 1 KiB of data spread across
317  *	its columns. The exact size depends on the device. Each row/column is a
318  *	series of capacitors and transistors. The transistor is used to select
319  *	data from the capacitor and the capacitor actually contains the logical
320  *	0/1 value.
321  *
322  * BANKS AND BANK GROUPS
323  *
324  *	An individual DRAM die is organized in something called a bank. A DIMM
325  *	has a number of banks that sit in series. These are then grouped into
326  *	larger bank groups. Generally speaking, each bank group has the same
327  *	number of banks. Let's take a look at an example of a system with 4
328  *	bank groups, each with 4 banks.
329  *
330  *         +-----------------------+           +-----------------------+
331  *         | Bank Group 0          |           | Bank Group 1          |
332  *         | +--------+ +--------+ |           | +--------+ +--------+ |
333  *         | | Bank 0 | | Bank 1 | |           | | Bank 0 | | Bank 1 | |
334  *         | +--------+ +--------+ |           | +--------+ +--------+ |
335  *         | +--------+ +--------+ |           | +--------+ +--------+ |
336  *         | | Bank 2 | | Bank 3 | |           | | Bank 2 | | Bank 3 | |
337  *         | +--------+ +--------+ |           | +--------+ +--------+ |
338  *         +-----------------------+           +-----------------------+
339  *
340  *         +-----------------------+           +-----------------------+
341  *         | Bank Group 2          |           | Bank Group 3          |
342  *         | +--------+ +--------+ |           | +--------+ +--------+ |
343  *         | | Bank 0 | | Bank 1 | |           | | Bank 0 | | Bank 1 | |
344  *         | +--------+ +--------+ |           | +--------+ +--------+ |
345  *         | +--------+ +--------+ |           | +--------+ +--------+ |
346  *         | | Bank 2 | | Bank 3 | |           | | Bank 2 | | Bank 3 | |
347  *         | +--------+ +--------+ |           | +--------+ +--------+ |
348  *         +-----------------------+           +-----------------------+
349  *
350  *	On a DIMM, only a single bank and bank group can be active at a time for
351  *	reading or writing an 8 byte chunk of data. However, these are still
352  *	pretty important and useful because of the time involved to switch
353  *	between them. It is much cheaper to switch between bank groups than
354  *	between banks and that time can be cheaper than activating a new row.
355  *	This allows memory controllers to pipeline this substantially.
356  *
357  * RANK AND CHIP-SELECT
358  *
359  *	The next level of organization is a rank. A rank is effectively an
360  *	independent copy of all the bank and bank groups on a DIMM. That is,
361  *	there are additional copies of the DIMM's organization, but not the data
362  *	itself. Originally a
363  *	single or dual rank DIMM was built such that one copy of everything was
364  *	on each physical side of the DIMM. As the number of ranks has increased
365  *	this has changed as well. Generally speaking, the contents of the rank
366  *	are equivalent. That is, you have the same number of bank groups, banks,
367  *	and each bank has the same number of rows and columns.
368  *
369  *	Ranks are selected by what's called a chip-select, often abbreviated as
370  *	CS_L in the various DRAM standards. AMD also often abbreviates this as a
371  *	CS (which is not to be confused with the DF class of device called a
372  *	CS). These signals are used to select a rank to activate on a DIMM.
373  *	There are some number of these for each DIMM which is how the memory
374  *	controller chooses which of the DIMMs it's actually going to activate in
375  *	the system.
376  *
377  *	One interesting gotcha here is how AMD organizes things. Each DIMM
378  *	logically is broken into two chip-selects in hardware. Between DIMMs
379  *	with more than 2 ranks and 3D stacked RDIMMs, there are ways to
380  *	potentially activate more bits. Ultimately these are mapped to a series
381  *	of rank multiplication logic internally. These ultimately then control
382  *	some of these extra pins, though the exact method isn't 100% clear at
383  *	this time.
384  *
385  * -----------------------
386  * Rough Hardware Process
387  * -----------------------
388  *
389  * To better understand how everything is implemented and structured, it's worth
390  * briefly describing what happens when hardware wants to read a given physical
391  * address. This is roughly summarized in the following chart. In the left hand
392  * side is the type of address, which is transformed and generally shrinks along
393  * the way. Next to it is the actor that is taking action and the type of
394  * address that it starts with.
395  *
396  * +---------+   +------+
397  * | Virtual |   | CPU  |
398  * | Address |   | Core |
399  * +---------+   +------+
400  *      |           |          The CPU core receives a memory request and then
401  *      |           * . . . .  determines whether this request is DRAM or MMIO
402  *      |           |          (memory-mapped I/O) and then sends it to the data
403  *      v           v          fabric.
404  * +----------+ +--------+
405  * | Physical | | Data   |
406  * | Address  | | Fabric |
407  * +----------+ +--------+
408  *      |           |          The data fabric instance in the CCX/D uses the
409  *      |           * . . . .  programmed DRAM rules to determine what DRAM
410  *      |           |          channel to direct a request to and what the
411  *      |           |          channel-relative address is. It then sends the
412  *      |           |          request through the fabric. Note, the number of
413  *      |           |          DRAM rules varies based on the processor SoC.
414  *      |           |          Server parts like Milan have many more rules than
415  *      |           |          an APU like Cezanne. The DRAM rules tell us both
416  *      v           v          how to find and normalize the physical address.
417  * +---------+  +---------+
418  * | Channel |  | DRAM    |
419  * | Address |  | Channel |
420  * +---------+  +---------+
421  *      |           |          The UMC (unified memory controller) receives the
422  *      |           * . . . .  DRAM request and determines which DIMM to send
423  *      |           |          the request to along with the rank, banks, row,
424  *      |           |          column, etc. It initiates a DRAM transaction and
425  *      |           |          then sends the results back through the data
426  *      v           v          fabric to the CPU core.
427  * +---------+  +--------+
428  * | DIMM    |  | Target |
429  * | Address |  | DIMM   |
430  * +---------+  +--------+
431  *
432  * The above is all generally done in hardware. There are multiple steps
433  * internal to this that we end up mimicking in software. This includes things
434  * like, applying hashing logic, address transformations, and related.
435  * Thankfully the hardware is fairly generic and programmed with enough
436  * information that we can pull out to figure this out. The rest of this theory
437  * statement covers the major parts of this: interleaving, the act of
438  * determining which memory channel to actually go to, and normalization, the
439  * act of removing some portion of the physical address bits to determine the
440  * address relative to a channel.
441  *
442  * ------------------------
443  * Data Fabric Interleaving
444  * ------------------------
445  *
446  * One of the major parts of address decoding is to understand how the
447  * interleaving features work in the data fabric. This is used to allow an
448  * address range to be spread out between multiple memory channels and then,
449  * later on, when normalizing the address. As mentioned above, a system address
450  * matches a rule which has information on interleaving. Interleaving comes in
451  * many different flavors. It can be used to just switch between channels,
452  * sockets, and dies. It can also end up involving some straightforward and some
453  * fairly complex hashing operations.
454  *
455  * Each DRAM rule has instructions on how to perform this interleaving. The way
456  * this works is that the rule first says to start at a given address bit,
457  * generally ranging from bit 8-12. These influence the granularity of the
458  * interleaving going on. From there, the rules determine how many bits to use
459  * from the address to determine the die, socket, and channel. In the simplest
460  * form, these perform a log2 of the actual number of things you're interleaving
461  * across (we'll come back to non-powers of two). So let's work a few common
462  * examples:
463  *
464  *   o 8-channel interleave, 1-die interleave, 2-socket interleave
465  *     Start at bit 9
466  *
467  *	In this case we have 3 bits that determine the channel to use, 0 bits
468  *	for the die, 1 bit for the socket. Here we would then use the following
469  *	bits to determine what the channel, die, and socket IDs are:
470  *
471  *	[12]    - Socket ID
472  *	[11:9]  - Channel ID
473  *
474  *	You'll note that there was no die-interleave, which means the die ID is
475  *	always zero. This is the general thing you expect to see in Zen 2 and 3
476  *	based systems as they only have one die or a Zen 1 APU.
477  *
478  *   o 2-channel interleave, 4-die interleave, 2-socket interleave
479  *     Start at bit 10
480  *
481  *	In this case we have 1 bit for the channel and socket interleave. We
482  *	have 2 bits for the die. This is something you might see on a Zen 1
483  *	system. This results in the following bits:
484  *
485  *      [13]    - Socket ID
486  *      [12:11] - Die ID
487  *      [10]    - Channel ID
488  *
489  *
490  * COD, NPS, and MI3H HASHING
491  *
492  * However, this isn't the only primary extraction rule of the above values. The
493  * other primary method is using a hash. While the exact hash methods vary
494  * between Zen 2/3 and Zen 4 based systems, they follow a general scheme. In the
495  * system there are three interleaving configurations that are either global or
496  * enabled on a per-rule basis. These indicate whether one should perform the
497  * XOR computation using addresses at:
498  *
499  *   o 64 KiB (starting at bit 16)
500  *   o 2 MiB (starting at bit 21)
501  *   o 1 GiB (starting at bit 30)
502  *
503  * In this world, you take the starting address bit defined by the rule and XOR
504  * it with each enabled interleave address. If you have more than one bit to
505  * select (e.g. because you are hashing across more than 2 channels), then you
506  * continue taking subsequent bits from each enabled region. So the second bit
507  * would use 17, 21, and 31 if all three ranges were enabled while the third bit
508  * would use 18, 22, and 32. While these are straightforward, there is a catch.
509  *
510  * While the DRAM rule contains what the starting address bit, you don't
511  * actually use subsequent bits in the same way. Instead subsequent bits are
512  * deterministic and use bits 12 and 13 from the address.  This is not the same
513  * consecutive thing that one might expect. Let's look at a Rome/Milan based
514  * example:
515  *
516  *   o 8-channel "COD" hashing, starting at address 9. All three ranges enabled.
517  *     1-die and 1-socket interleaving.
518  *
519  *      In this model we are using 3 bits for the channel, 0 bits for the socket
520  *      and die.
521  *
522  *	Channel ID[0] = addr[9]  ^ addr[16] ^ addr[21] ^ addr[30]
523  *	Channel ID[1] = addr[12] ^ addr[17] ^ addr[22] ^ addr[31]
524  *	Channel ID[2] = addr[13] ^ addr[18] ^ addr[23] ^ addr[32]
525  *
526  *	So through this scheme we'd have a socket/die of 0, and then the channel
527  *	ID is computed based on that. The number of bits that we use here
528  *	depends on how many channels the hash is going across.
529  *
530  * The Genoa and related variants, termed "NPS", has a few wrinkles. First,
531  * rather than 3 bits being used for the channel, up to 4 bits are. Second,
532  * while the Rome/Milan "COD" hash above does not support socket or die
533  * interleaving, the "NPS" hash actually supports socket interleaving. However,
534  * unlike the straightforward non-hashing scheme, the first bit is used to
535  * determine the socket when enabled as opposed to the last one. In addition, if
536  * we're not performing socket interleaving, then we end up throwing address bit
537  * 14 into the mix here. Let's look at examples:
538  *
539  *   o 4-channel "NPS" hashing, starting at address 8. All three ranges enabled.
540  *     1-die and 1-socket interleaving.
541  *
542  *      In this model we are using 2 bits for the channel, 0 bits for the socket
543  *      and die. Because socket interleaving is not being used, bit 14 ends up
544  *      being added into the first bit of the channel selection. Presumably this
545  *      is to improve the address distribution in some form.
546  *
547  *      Channel ID[0] = addr[8] ^ addr[16] ^ addr[21] ^ addr[30] ^ addr[14]
548  *      Channel ID[1] = addr[12] ^ addr[17] ^ addr[22] ^ addr[31]
549  *
550  *   o 8-channel "NPS" hashing, starting at address 9. All three ranges enabled.
551  *     1-die and 2-socket interleaving.
552  *
553  *      In this model we are using 3 bits for the channel and 1 for the socket.
554  *      The die is always set to 0. Unlike the above, address bit 14 is not used
555  *      because it ends up being required for the 4th address bit.
556  *
557  *	Socket ID[0]  = addr[9]  ^ addr[16] ^ addr[21] ^ addr[30]
558  *	Channel ID[0] = addr[12] ^ addr[17] ^ addr[22] ^ addr[31]
559  *	Channel ID[1] = addr[13] ^ addr[18] ^ addr[23] ^ addr[32]
560  *	Channel ID[2] = addr[14] ^ addr[19] ^ addr[24] ^ addr[33]
561  *
562  * DF 4D2 NPS 1K/2K
563  *
564  * In our DF 4D2 variant, the interleave controls were changed and the way that
565  * hashes work is different. There are two main families here, a variant on the
566  * prior NPS hashing that is either NPS 1K or NPS 2K and the MI300 variant that
567  * we call MI3H. First, there are two additional address ranges that have been
568  * added:
569  *
570  *   o 4 KiB (starting at bit 12)
571  *   o 1 TiB (starting at bit 40)
572  *
573  * Of these, our understanding is that the 4 KiB range is only used for MI3H
574  * based hashing. When it is used, only bits 12-14 will be used, but that's
575  * because the hash algorithm for the MI3H series is, well, unique. The 1T
576  * otherwise works somewhat as normal. Currently we don't support the MI3H
577  * decoding, but know that it exists in the code so we can provide a better
578  * error code.
579  *
580  * The NPS 1K/2K hashes use a similar style. These are designed to support up to
581  * 32 channel hashes, which causes up to 5 bits to be used. The 5 bit form is
582  * only supported in the 1K variant. It starts at bit 8 (the nominally required
583  * starting interleave address) and then uses bit 9, before jumping up to bits
584  * 12-14 as required. The XOR addresses count up in a similar fashion. So the 64
585  * KiB interleave would use up to bits 16-20 in this scheme (corresponding to
586  * result bits 0-4).
587  *
588  * When the 2K form is used, only 4 bits are supported and the entire bit 9 row
589  * is ignored. This looks very similar to the NPS form; however, the gap is also
590  * there in the XOR bits and there is no longer the question of using bit 14 or
591  * not with socket interleaving. It is only ever used if we need the 5th channel
592  * bit. To see the difference let's look at two examples where the only
593  * difference between the two is whether we are using 1 or 2K hashing.
594  *
595  *   o 8-channel "NPS" 1K hashing, starting at address 8. 64 KiB, 2 MiB, 1 GiB,
596  *     and 1 TiB are enabled. 1-die and 1-socket.
597  *
598  *     In this model, there are always 3 bits for the channel. This means that
599  *     we only will use bits 8, 9, and 12 from the address to start with.
600  *
601  *      Channel ID[0] = addr[8]  ^ addr[16] ^ addr[21] ^ addr[30]
602  *      Channel ID[1] = addr[9]  ^ addr[17] ^ addr[22] ^ addr[31]
603  *      Channel ID[2] = addr[12] ^ addr[18] ^ addr[23] ^ addr[32]
604  *
605  *   o 8-channel "NPS" 2K hashing, starting at address 8. 64 KiB, 2 MiB, 1 GiB,
606  *     and 1 TiB are enabled. 1-die and 1-socket.
607  *
608  *     In this model, we also use 3 bits for the channel. However, we no longer
609  *     use bit 9, which is the 1K mode only. Similarly, you'll see that the bits
610  *     from the hash that would have been used for determining interleaving with
611  *     bit 9 are skipped entirely. This is why the 1K/2K variants are
612  *     incompatible with the original NPS hashing.
613  *
614  *      Channel ID[0] = addr[8]  ^ addr[16] ^ addr[21] ^ addr[30]
615  *      Channel ID[1] = addr[12] ^ addr[18] ^ addr[23] ^ addr[32]
616  *      Channel ID[2] = addr[13] ^ addr[19] ^ addr[24] ^ addr[33]
617  *
618  * ZEN 3 6-CHANNEL
619  *
620  * These were the simple cases. Things get more complex when we move to
621  * non-power of 2 based hashes between channels. There are two different sets of
622  * these schemes. The first of these is 6-channel hashing that was added in Zen
623  * 3. The second of these is a more complex and general form that was added in
624  * Zen 4. Let's start with the Zen 3 case. The Zen 3 6-channel hash requires
625  * starting at address bits 11 or 12 and varies its logic somewhat from there.
626  * In the 6-channel world, the socket and die interleaving must be disabled.
627  * Let's walk through an example:
628  *
629  *   o 6-channel Zen 3, starting at address 11. 2M and 1G range enabled.
630  *     1-die and 1-socket interleaving.
631  *
632  *      Regardless of the starting address, we will always use three bits to
633  *      determine a channel address. However, it's worth calling out that the
634  *      64K range is not considered for this at all. Another oddity is that when
635  *      calculating the hash bits the order of the extracted 2M and 1G addresses
636  *      are different.
637  *
638  *	This flow starts by calculating the three hash bits. This is defined
639  *	below. In the following, all bits marked with an '@' are ones that will
640  *	change when starting at address bit 12. In those cases the value will
641  *	increase by 1. Here's how we calculate the hash bits:
642  *
643  *      hash[0] = addr[11@] ^ addr[14@] ^ addr[23] ^ addr[32]
644  *      hash[1] = addr[12@] ^ addr[21]  ^ addr[30]
645  *      hash[2] = addr[13@] ^ addr[22]  ^ addr[31]
646  *
647  *      With this calculated, we always assign the first bit of the channel
648  *      based on the hash. The other bits are more complicated as we have to
649  *      deal with that gnarly power of two problem. We determine whether or not
650  *      to use the hash bits directly in the channel based on their value. If
651  *      they are not equal to 3, then we use it, otherwise if they are, then we
652  *      need to go back to the physical address and we take its modulus.
653  *      Basically:
654  *
655  *      Channel Id[0] = hash[0]
656  *      if (hash[2:1] == 3)
657  *		Channel ID[2:1] = (addr >> [11@+3]) % 3
658  *      else
659  *		Channel ID[2:1] = hash[2:1]
660  *
661  *
662  * ZEN 4 NON-POWER OF 2
663  *
664  * I hope you like modulus calculations, because things get even more complex
665  * here now in Zen 4 which has many more modulus variations. These function in a
666  * similar way to the older 6-channel hash in Milan. They require one to start
667  * at address bit 8, they require that there is no die interleaving, and they
668  * support socket interleaving. The different channel arrangements end up in one
669  * of two sets of modulus values: a mod % 3 and a mod % 5 based on the number
670  * of channels used. Unlike the Milan form, all three address ranges (64 KiB, 2
671  * MiB, 1 GiB) are allowed to be used.
672  *
673  *   o 6-channel Zen 4, starting at address 8. 64K, 2M, and 1G range enabled.
674  *     1-die and 2-socket interleaving.
675  *
676  *      We start by calculating the following set of hash bits regardless of
677  *      the number of channels that exist. The set of hash bits that is actually
678  *      used in various computations ends up varying based upon the number of
679  *      channels used. In 3-5 configs, only hash[0] is used. 6-10, both hash[0]
680  *      and hash[2] (yes, not hash[1]). The 12 channel config uses all three.
681  *
682  *      hash[0] = addr[8]  ^ addr[16] ^ addr[21] ^ addr[30] ^ addr[14]
683  *      hash[1] = addr[12] ^ addr[17] ^ addr[22] ^ addr[31]
684  *      hash[2] = addr[13] ^ addr[18] ^ addr[23] ^ addr[32]
685  *
686  *      Unlike other schemes where bits directly map here, they instead are used
687  *      to seed the overall value. Depending on whether hash[0] is a 0 or 1, the
688  *      system goes through two different calculations entirely. Though all of
689  *      them end up involving the remainder of the system address going through
690  *      the modulus. In the following, a '3@' indicates the modulus value would
691  *      be swapped to 5 in a different scenario.
692  *
693  *      Channel ID = addr[63:14] % 3@
694  *      if (hash[0] == 1)
695  *		Channel ID = (Channel ID + 1) % 3@
696  *
697  *      Once this base has for the channel ID has been calculated, additional
698  *      portions are added in. As this is the 6-channel form, we say:
699  *
700  *      Channel ID = Channel ID + (hash[2] * 3@)
701  *
702  *      Finally the socket is deterministic and always comes from hash[0].
703  *      Basically:
704  *
705  *      Socket ID = hash[0]
706  *
707  *   o 12-channel Zen 4, starting at address 8. 64K, 2M, and 1G range enabled.
708  *     1-die and 1-socket interleaving.
709  *
710  *       This is a variant of the above. The hash is calculated the same way.
711  *       The base Channel ID is the same and if socket interleaving were enabled
712  *       it would also be hash[0]. What instead differs is how we use hash[1]
713  *       and hash[2]. The following logic is used instead of the final
714  *       calculation above.
715  *
716  *       Channel ID = Channel ID + (hash[2:1] * 3@)
717  *
718  * NPS 1K/2K NON-POWER of 2
719  *
720  * Just as the normal hashing changed with the introduction of the 1K/2K
721  * variants, so does the non-power of 2 hashing. This NP2 scheme is rather
722  * different than the base Zen 4 one. This uses the 64 KiB, 2 MiB, 1 GiB, and 1
723  * TiB ranges for hashing. Logically there are both 3 and 5 channel hashes again
724  * like Zen 4 and when socket interleaving is enabled, address bit 8 is always
725  * going to the socket.
726  *
727  * The 1K and 2K modes change which addresses are used and considered just like
728  * the non-NP2 case. The same interleave bit skipping for 2K still applies,
729  * meaning bit 9 will not be used for hashing and will instead be part of the
730  * normal address calculations that we have.
731  *
732  * Like in the Zen 4 case, we are going to be constructing our normalized
733  * address from three regions of bits. The low region which is everything that
734  * is used before the hashing, the bits skipped in the middle, and then the
735  * upper bits that have been untouched. These are not rearranged, rather its
736  * best to think of it as bits are removed from this, causing shifts and
737  * shrinks.
738  *
739  * Another important difference to call out before we get to examples is that
740  * each variant here uses a different address range as the upper portion to use.
741  * Unfortunately, where as for Zen 4 we had some regular rules, each of these
742  * cases seems rather different. However, there is some general logic which is
743  * that in each case we calculate some modulus value from different addresses
744  * which we use to determine the channel, sometimes mixed with other hash bits.
745  * Then we calculate a new normalized address by taking the divisor as the high
746  * portion. Let's look at some examples here:
747  *
748  *   o 12 Channel 1K Zen 5, starting at address 8. 64K, 2M, 1G, and 1T ranges
749  *     enabled. 1-die and 1-socket interleaving.
750  *
751  *      This 12 channel mode is a modulus 3 case. This particular case needs two
752  *      hash bits. Because it is a 1K mode it uses bits 8 and 9. If we were in a
753  *      2K mode, we'd use bits 8 and 12. Bit 8 always also hashes in bit 14 just
754  *      like the Zen 4 case.
755  *
756  *      hash[0] = addr[8]  ^ addr[16] ^ addr[21] ^ addr[30] ^ addr[40] ^
757  *		  addr[14]
758  *      hash[1] = addr[9]  ^ addr[17] ^ addr[22] ^ addr[31] ^ addr[41]
759  *
760  *      Now that we have that, it's time to calculate the address we need to
761  *      take the modulus of to stick into the channel. For this particular case,
762  *      we construct an address as PA >> 12 | 0b00. In other words we take bits
763  *      [48+, 12] and move them to bit 2. Once we have that, we can go ahead and
764  *      construct the value modulus 3. Symbolically:
765  *
766  *      modAddr = (addr[64:12] & ~3) | 0b00 (or (addr >> 12) << 2)
767  *      modVal = modAddr % 3
768  *
769  *      Channel ID[0] = hash[0]
770  *      Channel ID[1] = hash[1]
771  *      Channel ID[2] = modval[0]
772  *      Channel ID[3] = modval[1]
773  *
774  *      In the 2K version we use (addr[64:13] & ~7) | 0b000 and hash[1] is based
775  *      on addr[12] rather than addr[9].
776  *
777  *   o 5 Channel 2K Zen 5, starting at address 8. 64K, 2M, 1G, and 1T ranges
778  *     enabled. 1-die and 1-socket interleaving.
779  *
780  *      With the 5-channel based mode we now will working modulus five rather
781  *      than three. In this case, we have similar logic, except the way the
782  *      address is constructed to take the mod of is different. We can think of
783  *      this as:
784  *
785  *      modAddr = addr[64:12] | addr[8] | 0b0
786  *      modVal = modAddr % 5
787  *
788  *      Channel ID[0] = modVal[0]
789  *      Channel ID[1] = modVal[1]
790  *      Channel ID[2] = modVal[2]
791  *
792  *      Basically this ends up using a rather similar logical construction;
793  *      however, the values that it plugs in are different. Note, that there was
794  *      no use of the hash in this case.
795  *
796  * POST BIT EXTRACTION
797  *
798  * Now, all of this was done to concoct up a series of indexes used. However,
799  * you'll note that a given DRAM rule actually already has a fabric target. So
800  * what do we do here? We add them together.
801  *
802  * The data fabric has registers that describe which bits in a fabric ID
803  * correspond to a socket, die, and channel. Taking the channel, die, and socket
804  * IDs above, one can construct a fabric ID. From there, we add the two data
805  * fabric IDs together and can then get to the fabric ID of the actual logical
806  * target. This is why all of the socket and die interleaving examples with no
807  * interleaving are OK to result in a zero. The idea here is that the base
808  * fabric ID in the DRAM rule will take care of indicating those other things as
809  * required.
810  *
811  * You'll note the use of the term "logical target" up above. That's because
812  * some platforms have the ability to remap logical targets to physical targets
813  * (identified by the use of the ZEN_UMC_FAM_F_TARG_REMAP flag in the family
814  * data or the DF::DfCapability register once we're at the DF 4D2 variant). The
815  * way that remapping works changes based on the hardware generation.  This was
816  * first added in Milan (Zen 3) CPUs. In that model, you would use the socket
817  * and component information from the target ID to identify which remapping
818  * rules to use. On Genoa (Zen 4) CPUs, you would instead use information in the
819  * rule itself to determine which of the remap rule sets to use and then uses
820  * the component ID to select which rewrite rule to use.
821  *
822  * Finally, there's one small wrinkle with this whole scheme that we haven't
823  * discussed: what actually is the address that we plug into this calculation.
824  * While you might think it actually is just the system address itself, that
825  * isn't actually always the case. Sometimes rather than using the address
826  * itself, it gets normalized based on the DRAM rule, which involves subtracting
827  * out the base address and potentially subtracting out the size of the DRAM
828  * hole (if the address is above the hole and hoisting is active for that
829  * range). When this is performed appears to tie to the DF generation. The
830  * following table relates the DF generation to our behavior:
831  *
832  *   o DF 2 (Zen 1): Use the raw address
833  *   o DF 3 (Zen 2-3): Use the raw address if it's not a power of 2
834  *   o DF 3.5: Use the adjusted address
835  *   o DF 4 (Zen 4): Use the adjusted address
836  *   o DF 4D2 (Zen 4/5): Use the raw address
837  *
838  * --------------------------------------------
839  * Data Fabric Interleave Address Normalization
840  * --------------------------------------------
841  *
842  * While you may have thought that we were actually done with the normalization
843  * fun in the last section, there's still a bit more here that we need to
844  * consider. In particular, there's a secondary transformation beyond
845  * interleaving that occurs as part of constructing the channel normalized
846  * address. Effectively, we need to account for all the bits that were used in
847  * the interleaving and generally speaking remove them from our normalized
848  * address.
849  *
850  * While this may sound weird on paper, the way to think about it is that
851  * interleaving at some granularity means that each device is grabbing the same
852  * set of addresses, the interleave just is used to direct it to its own
853  * location. When working with a channel normalized address, we're effectively
854  * creating a new region of addresses that have meaning within the DIMMs
855  * themselves. The channel doesn't care about what got it there, mainly just
856  * what it is now. So with that in mind, we need to discuss how we remove all
857  * the interleaving information in our different modes.
858  *
859  * Just to make sure it's clear, we are _removing_ all bits that were used for
860  * interleaving. This causes all bits above the removed ones to be shifted
861  * right.
862  *
863  * First, we have the case of standard power of 2 interleaving that applies to
864  * the 1, 2, 4, 8, 16, and 32 channel configurations. Here, we need to account
865  * for the total number of bits that are used for the channel, die, and socket
866  * interleaving and we simply remove all those bits starting from the starting
867  * address.
868  *
869  *   o 8-channel interleave, 1-die interleave, 2-socket interleave
870  *     Start at bit 9
871  *
872  *     If we look at this example, we are using 3 bits for the channel, 1 for
873  *     the socket, for a total of 4 bits. Because this is starting at bit 9,
874  *     this means that interleaving covers the bit range [12:9]. In this case
875  *     our new address would be (orig[63:13] >> 4) | orig[8:0].
876  *
877  *
878  * COD and NPS HASHING
879  *
880  * That was the simple case, next we have the COD/NPS hashing case that we need
881  * to consider. If we look at these, the way that they work is that they split
882  * which bits they use for determining the channel address and then hash others
883  * in. Here, we need to extract the starting address bit, then continue at bit
884  * 12 based on the number of bits in use and whether or not socket interleaving
885  * is at play for the NPS variant. Let's look at an example here:
886  *
887  *   o 8-channel "COD" hashing, starting at address 9. All three ranges enabled.
888  *     1-die and 1-socket interleaving.
889  *
890  *     Here we have three total bits being used. Because we start at bit 9, this
891  *     means we need to drop bits [13:12], [9]. So our new address would be:
892  *
893  *     orig[63:14] >> 3 | orig[11:10] >> 1 | orig[8:0]
894  *     |                  |                  +-> stays the same
895  *     |                  +-> relocated to bit 9 -- shifted by 1 because we
896  *     |                      removed bit 9.
897  *     +--> Relocated to bit 11 -- shifted by 3 because we removed bits, 9, 12,
898  *          and 13.
899  *
900  *   o 8-channel "NPS" hashing, starting at address 8. All three ranges enabled.
901  *     1-die and 2-socket interleaving.
902  *
903  *     Here we need to remove bits [14:12], [8]. We're removing an extra bit
904  *     because we have 2-socket interleaving. This results in a new address of:
905  *
906  *     orig[63:15] >> 4 | orig[11:9] >> 1 | orig[7:0]
907  *     |                  |                 +-> stays the same
908  *     |                  +-> relocated to bit 8 -- shifted by 1 because we
909  *     |                      removed bit 8.
910  *     +--> Relocated to bit 11 -- shifted by 4 because we removed bits, 8, 12,
911  *          13, and 14.
912  *
913  * NPS 1K/2K Hashing
914  *
915  * This case is a fairly straightforward variant on what we just discussed. In
916  * fact, 2K hashing looks just like what we've done before. The only difference
917  * with 1K hashing is that we'll consider bit 9 also for removal before we jump
918  * up to bit 12. Let's look at an example:
919  *
920  *   o 8-channel "NPS" 1K hashing, starting at address 8. All three ranges
921  *     enabled. 1-die and 2-socket interleaving.
922  *
923  *     Here we need to remove a total of 4 bits, which is now broken into
924  *     [13:12] and [9:8]. This results in a new address of:
925  *
926  *     orig[63:14] >> 4 | orig[11:10] >> 2 | orig[7:0]
927  *     |                  |                  +-> stays the same
928  *     |                  +-> relocated to bit 8 -- shifted by 2 because we
929  *     |                      removed bits 8 and 9.
930  *     +--> Relocated to bit 11 -- shifted by 4 because we removed bits, 8, 9,
931  *          12, and 13.
932  *
933  * ZEN 3 6-CHANNEL
934  *
935  * Now, to the real fun stuff, our non-powers of two. First, let's start with
936  * our friend, the Zen 3 6-channel hash. So, the first thing that we need to do
937  * here is start by recomputing our hash again based on the current normalized
938  * address. Regardless of the hash value, this first removes all three bits from
939  * the starting address, so that's removing either [14:12] or [13:11].
940  *
941  * The rest of the normalization process here is quite complex and somewhat mind
942  * bending. Let's start working through an example here and build this up.
943  * First, let's assume that each channel has a single 16 GiB RDIMM. This would
944  * mean that the channel itself has 96 GiB RDIMM. However, by removing 3 bits
945  * worth, that technically corresponds to an 8-channel configuration that
946  * normally suggest a 128 GiB configuration. The processor requires us to record
947  * this fact in the DF::Np2ChannelConfig register. The value that it wants us a
948  * bit weird. We believe it's calculated by the following:
949  *
950  *   1. Round the channel size up to the next power of 2.
951  *   2. Divide this total size by 64 KiB.
952  *   3. Determine the log base 2 that satisfies this value.
953  *
954  * In our particular example above. We have a 96 GiB channel, so for (1) we end
955  * up with 128 GiB (2^37). We now divide that by 64 KiB (2^16), so this becomes
956  * 2^(37 - 16) or 2^21. Because we want the log base 2 of 2^21 from (2), this
957  * simply becomes 21. The DF::Np2ChannelConfig has two members, a 'space 0' and
958  * 'space 1'. Near as we can tell, in this mode only 'space 0' is used.
959  *
960  * Before we get into the actual normalization scheme, we have to ask ourselves
961  * how do we actually interleave data 6 ways. The scheme here is involved.
962  * First, it's important to remember like with other normalization schemes, we
963  * do adjust for the address for the base address in the DRAM rule and then also
964  * take into account the DRAM hole if present.
965  *
966  * If we delete 3 bits, let's take a sample address and see where it would end
967  * up in the above scheme. We're going to take our 3 address bits and say that
968  * they start at bit 12, so this means that the bits removed are [14:12]. So the
969  * following are the 8 addresses that we have here and where they end up
970  * starting with 1ff:
971  *
972  *   o 0x01ff  -> 0x1ff, Channel 0 (hash 0b000)
973  *   o 0x11ff  -> 0x1ff, Channel 1 (hash 0b001)
974  *   o 0x21ff  -> 0x1ff, Channel 2 (hash 0b010)
975  *   o 0x31ff  -> 0x1ff, Channel 3 (hash 0b011)
976  *   o 0x41ff  -> 0x1ff, Channel 4 (hash 0b100)
977  *   o 0x51ff  -> 0x1ff, Channel 5 (hash 0b101)
978  *   o 0x61ff  -> 0x3000001ff, Channel 0 (hash 0b110)
979  *   o 0x71ff  -> 0x3000001ff, Channel 1 (hash 0b111)
980  *
981  * Yes, we did just jump to near the top of what is a 16 GiB DIMM's range for
982  * those last two. The way we determine when to do this jump is based on our
983  * hash. Effectively we ask what is hash[2:1]. If it is 0b11, then we need to
984  * do something different and enter this special case, basically jumping to the
985  * top of the range. If we think about a 6-channel configuration for a moment,
986  * the thing that doesn't exist are the traditional 8-channel hash DIMMs 0b110
987  * and 0b111.
988  *
989  * If you go back to the interleave this kind of meshes, that tried to handle
990  * the case of the hash being 0, 1, and 2, normally, and then did special things
991  * with the case of the hash being in this upper quadrant. The hash then
992  * determined where it went by shifting over the upper address and doing a mod
993  * 3 and using that to determine the upper two bits. With that weird address at
994  * the top of the range, let's go through and see what else actually goes to
995  * those weird addresses:
996  *
997  *   o 0x08000061ff -> 0x3000001ff, Channel 2 (hash 0b110)
998  *   o 0x08000071ff -> 0x3000001ff, Channel 3 (hash 0b111)
999  *   o 0x10000061ff -> 0x3000001ff, Channel 4 (hash 0b110)
1000  *   o 0x10000071ff -> 0x3000001ff, Channel 5 (hash 0b111)
1001  *
1002  * Based on the above you can see that we've split the 16 GiB DIMM into a 12 GiB
1003  * region (e.g. [ 0x0, 0x300000000 ), and a 4 GiB region [ 0x300000000,
1004  * 0x400000000 ). What seems to happen is that the CPU algorithmically is going
1005  * to put things in this upper range. To perform that action it goes back to the
1006  * register information that we stored in DF::Np2ChannelConfig. The way this
1007  * seems to be thought of is it wants to set the upper two bits of a 64 KiB
1008  * chunk (e.g. bits [15:14]) to 0b11 and then shift that over based on the DIMM
1009  * size.
1010  *
1011  * Our 16 GiB DIMM has 34 bits, so effectively we want to set bits [33:32] in
1012  * this case. The channel is 37 bits wide, which the CPU again knows as 2^21 *
1013  * 2^16. So it constructs the 64 KiB value of [15:14] = 0b11 and fills the rest
1014  * with zeros. It then multiplies it by 2^(21 - 3), or 2^18. The - 3 comes from
1015  * the fact that we removed 3 address bits. This when added to the above gets
1016  * us bits [33,32] = 0b11.
1017  *
1018  * While this appears to be the logic, I don't have a proof that this scheme
1019  * actually evenly covers the entire range, but a few examples appear to work
1020  * out.
1021  *
1022  * With this, the standard example flow that we give, results in something like:
1023  *
1024  *   o 6-channel Zen 3, starting at address 11. 2M and 1G range enabled. Here,
1025  *     we assume that the value of the NP2 space0 is 21 bits. This example
1026  *     assumes we have 96 GiB total memory, which means rounding up to 128 GiB.
1027  *
1028  *     Step 1 here is to adjust our address to remove the three bits indicated.
1029  *     So we simply always set our new address to:
1030  *
1031  *     orig[63:14] >> 3 | orig[10:0]
1032  *     |                  +-> stays the same
1033  *     +--> Relocated to bit 11 because a 6-channel config always uses 3 bits to
1034  *          perform interleaving.
1035  *
1036  *     At this step, one would need to consult the hash of the normalized
1037  *     address before removing bits (but after adjusting for the base / DRAM
1038  *     hole). If hash[2:1] == 3, then we would say that the address is actually:
1039  *
1040  *     0b11 << 32 | orig[63:14] >> 3 | orig[10:0]
1041  *
1042  *
1043  * ZEN 4 NON-POWER OF 2
1044  *
1045  * Next, we have the DFv4 versions of the 3, 5, 6, 10, and 12 channel hashing.
1046  * An important part of this is whether or not there is any socket hashing going
1047  * on. Recall there, that if socket hashing was going on, then it is part of the
1048  * interleave logic; however, if it is not, then its hash actually becomes
1049  * part of the normalized address, but not in the same spot!
1050  *
1051  * In this mode, we always remove the bits that are actually used by the hash.
1052  * Recall that some modes use hash[0], others hash[0] and hash[2], and then only
1053  * the 12-channel config uses hash[2:0]. This means we need to be careful in how
1054  * we actually remove address bits. All other bits in this lower range we end up
1055  * keeping and using. The top bits, e.g. addr[63:14] are kept and divided by the
1056  * actual channel-modulus. If we're not performing socket interleaving and
1057  * therefore need to keep the value of hash[0], then it is appended as the least
1058  * significant bit of that calculation.
1059  *
1060  * Let's look at an example of this to try to make sense of it all.
1061  *
1062  *   o 6-channel Zen 4, starting at address 8. 64K, 2M, and 1G range enabled.
1063  *     1-die and 2-socket interleaving.
1064  *
1065  *     Here we'd start by calculating hash[2:0] as described in the earlier
1066  *     interleaving situation. Because we're using a socket interleave, we will
1067  *     not opt to include hash[0] in the higher-level address calculation.
1068  *     Because this is a 6-channel calculation, our modulus is 3. Here, we will
1069  *     strip out bits 8 and 13 (recall in the interleaving 6-channel example we
1070  *     ignored hash[1], thus no bit 12 here). Our new address will be:
1071  *
1072  *     (orig[63:14] / 3) >> 2 | orig[12:9] >> 1 | orig[7:0]
1073  *      |                       |                 +-> stays the same
1074  *      |                       +-> relocated to bit 8 -- shifted by 1 because
1075  *      |                           we removed bit 8.
1076  *      +--> Relocated to bit 12 -- shifted by 2 because we removed bits 8 and
1077  *           13.
1078  *
1079  *   o 12-channel Zen 4, starting at address 8. 64K, 2M, and 1G range enabled.
1080  *     1-die and 1-socket interleaving.
1081  *
1082  *     This is a slightly different case from the above in two ways. First, we
1083  *     will end up removing bits 8, 12, and 13, but then we'll also reuse
1084  *     hash[0]. Our new address will be:
1085  *
1086  *     ((orig[63:14] / 3) << 1 | hash[0]) >> 3 | orig[11:9] >> 1 | orig[7:0]
1087  *      |                                   |                      +-> stays the
1088  *      |                                   |                          same
1089  *      |                                   +-> relocated to bit 8 -- shifted by
1090  *      |                                       1 because we removed bit 8.
1091  *      +--> Relocated to bit 11 -- shifted by 3 because we removed bits 8, 12,
1092  *           and 13.
1093  *
1094  * DF 4D2 NPS 1K/2K NON-POWER OF 2
1095  *
1096  * Unsurprisingly, if you've followed to this point, there is a slightly
1097  * different normalization scheme that is used here. Like in the other cases we
1098  * end up breaking the address into the three parts that are used: a lower
1099  * portion that remains the same, a middle portion that is from bits that were
1100  * not used as part of the interleaving process, and the upper portion which is
1101  * where we end up with our division (like the Zen 4 case above). To add to the
1102  * fun, the upper portion that gets divided sometimes has some lower parts of
1103  * the address tossed up there.
1104  *
1105  * Because each case is unique, we have created a data table in the decoder:
1106  * zen_umc_np2_k_rules. This structure has a number of pieces that describe how
1107  * to transform the address. Logically this computation looks like:
1108  *
1109  *   [ upper address / modulus ] | middle bits | low bits
1110  *     |                           |             |
1111  *     |                           |             +-> Always bits (rule start, 0]
1112  *     |                           |
1113  *     |                           +-> The starting bit is zukr_norm_addr. There
1114  *     |                               are zukr_norm_naddr bits. This is:
1115  *     |                               (zukr_norm_addr + zukr_norm_naddr,
1116  *     |                                zukr_norm_addr].
1117  *     |
1118  *     +--> This has two portions everything from (64, zukr_high] and then the
1119  *          optional bonus region, which is indicated by zukr_div_addr and
1120  *          zukr_div_naddr. These bits are always the low bit. Meaning that the
1121  *          initial bits will be shifted over by zukr_div_naddr before we
1122  *          perform the division.
1123  *
1124  * Once each of these three pieces has been calculated, all the resulting pieces
1125  * will be shifted so they are contiguous like the other cases as though the
1126  * removed bits didn't exist.
1127  *
1128  *
1129  * That's most of the normalization process for the time being. We will have to
1130  * revisit this when we have to transform a normal address into a system address
1131  * and undo all this.
1132  *
1133  * -------------------------------------
1134  * Selecting a DIMM and UMC Organization
1135  * -------------------------------------
1136  *
1137  * One of the more nuanced things in decoding and encoding is the question of
1138  * where do we send a channel normalized address. That is, now that we've gotten
1139  * to a given channel, we need to transform the address into something
1140  * meaningful for a DIMM, and select a DIMM as well. The UMC SMN space contains
1141  * a number of Base Address and Mask registers which they describe as activating
1142  * a chip-select. A given UMC has up to four primary chip-selects (we'll come
1143  * back to DDR5 sub-channels later). The first two always go to the first DIMM
1144  * in the channel and the latter two always go to the second DIMM in the
1145  * channel. Put another way, you can always determine which DIMM you are
1146  * referring to by taking the chip-select and shifting it by 1.
1147  *
1148  * The UMC Channel registers are organized a bit differently in different
1149  * hardware generations. In a DDR5 based UMC, almost all of our settings are on
1150  * a per-chip-select basis while as in a DDR4 based system only the bases and
1151  * masks are. While gathering data we normalize this such that each logical
1152  * chip-select (umc_cs_t) that we have in the system has the same data so that
1153  * way DDR4 and DDR5 based systems are the same to the decoding logic. There is
1154  * also channel-wide data such as hash configurations and related.
1155  *
1156  * Each channel has a set of base and mask registers (and secondary ones as
1157  * well). To determine if we activate a given one, we first check if the
1158  * enabled bit is set. The enabled bit is set on a per-base basis, so both the
1159  * primary and secondary registers have separate enables. As there are four of
1160  * each base, mask, secondary base, and secondary mask, we say that if a
1161  * normalized address matches either a given indexes primary or secondary index,
1162  * then it activates that given UMC index. The basic formula for an enabled
1163  * selection is:
1164  *
1165  *	NormAddr & ~Mask[i] == Base[i] & ~Mask[i]
1166  *
1167  * Once this is selected, this index in the UMC is what it always used to derive
1168  * the rest of the information that is specific to a given chip-select or DIMM.
1169  * An important thing to remember is that from this point onwards, while there
1170  * is a bunch of hashing and interleaving logic it doesn't change which UMC
1171  * channel we read the data from. Though the particular DIMM, rank, and address
1172  * we access will change as we go through hashing and interleaving.
1173  *
1174  * ------------------------
1175  * Row and Column Selection
1176  * ------------------------
1177  *
1178  * The number of bits that are used for the row and column address of a DIMM
1179  * varies based on the type of module itself. These depend on the density of a
1180  * DIMM module, e.g. how large an individual DRAM block is, a value such as 16
1181  * Gbit, and the number of these wide it is, which is generally phrased as X4,
1182  * X8, and X16. The memory controller encodes the number of bits (derived from
1183  * the DIMM's SPD data) and then determines which bits are used for addresses.
1184  *
1185  * Based on this information we can initially construct a row and a column
1186  * address by leveraging the information about the number of bits and then
1187  * extracting the correct bits out of the normalized channel address.
1188  *
1189  * If you've made it this far, you know nothing is quite this simple, despite it
1190  * seeming so. Importantly, not all DIMMs actually have storage that is a power
1191  * of 2. As such, there's another bit that we have to consult to transform the
1192  * actual value that we have for a row, remarkably the column somehow has no
1193  * transformations applied to it.
1194  *
1195  * The hardware gives us information on inverting the two 'most significant
1196  * bits' of the row address which we store in 'ucs_inv_msbs'. First, we have the
1197  * question of what are our most significant bits here. This is basically
1198  * determined by the number of low and high row bits. In this case higher
1199  * actually is what we want. Note, the high row bits only exist in DDR4. Next,
1200  * we need to know whether we used the primary or secondary base/mask pair for
1201  * this as there is a primary and secondary inversion bits. The higher bit of
1202  * the inversion register (e.g ucs_inv_msbs[1]) corresponds to the highest row
1203  * bit. A zero in the bit position indicates that we should not perform an
1204  * inversion where as a one says that we should invert this.
1205  *
1206  * To actually make this happen we can take advantage of the fact that the
1207  * meaning of a 0/1 above means that this can be implemented with a binary
1208  * exclusive-OR (XOR). Logically speaking if we have a don't invert setting
1209  * present, a 0, then x ^ 0 is always x. However, if we have a 1 present, then
1210  * we know that (for a single bit) x ^ 1 = ~x. We take advantage of this fact in
1211  * the row logic.
1212  *
1213  * ---------------------
1214  * Banks and Bank Groups
1215  * ---------------------
1216  *
1217  * While addressing within a given module is done by the use of a row and column
1218  * address, to increase storage density a module generally has a number of
1219  * banks, which may be organized into one or more bank groups. While a given
1220  * DDR4/5 access happens in some prefetched chunk of say 64 bytes (what do you
1221  * know, that's a cacheline), that all occurs within a single bank. The addition
1222  * of bank groups makes it easier to access data in parallel -- it is often
1223  * faster to read from another bank group than to read another region inside a
1224  * bank group.
1225  *
1226  * Based on the DIMMs internal configuration, there will be a specified number
1227  * of bits used for the overall bank address (including bank group bits)
1228  * followed by a number of bits actually used for bank groups. There are
1229  * separately an array of bits used to concoct the actual address. It appears,
1230  * mostly through experimental evidence, that the bank group bits occur first
1231  * and then are followed by the bank selection itself.  This makes some sense if
1232  * you assume that switching bank groups is faster than switching banks.
1233  *
1234  * So if we see the UMC noting 4 bank bits and 2 bank groups bits, that means
1235  * that the umc_cs_t's ucs_bank_bits[1:0] correspond to bank_group[1:0] and
1236  * ucs_bank_bits[3:2] correspond to bank_address[1:0]. However, if there were no
1237  * bank bits indicated, then all of the address bits would correspond to the
1238  * bank address.
1239  *
1240  * Now, this would all be straightforward if not for hashing, our favorite.
1241  * There are five bank hashing registers per channel (UMC_BANK_HASH_DDR4,
1242  * UMC_BANK_HASH_DDR5), one that corresponds to the five possible bank bits. To
1243  * do this we need to use the calculated row and column that we previously
1244  * determined. This calculation happens in a few steps:
1245  *
1246  *   1) First check if the enable bit is set in the rule. If not, just use the
1247  *      normal bank address bit and we're done.
1248  *   2) Take a bitwise-AND of the calculated row and hash register's row value.
1249  *      Next do the same thing for the column.
1250  *   3) For each bit in the row, progressively XOR it, e.g. row[0] ^ row[1] ^
1251  *      row[2] ^ ... to calculate a net bit value for the row. This then
1252  *      repeats itself for the column. What basically has happened is that we're
1253  *      using the hash register to select which bits to impact our decision.
1254  *      Think of this as a traditional bitwise functional reduce.
1255  *   4) XOR the combined rank bit with the column bit and the actual bank
1256  *      address bit from the normalized address. So if this were bank bit 0,
1257  *      which indicated we should use bit 15 for bank[0], then we would
1258  *      ultimately say our new bit is norm_addr[15] ^ row_xor ^ col_xor
1259  *
1260  * An important caveat is that we would only consult all this if we actually
1261  * were told that the bank bit was being used. For example if we had 3 bank
1262  * bits, then we'd only check the first 3 hash registers. The latter two would
1263  * be ignored.
1264  *
1265  * Once this process is done, then we can go back and split the activated bank
1266  * into the actual bank used and the bank group used based on the first bits
1267  * going to the bank group.
1268  *
1269  * ---------------
1270  * DDR5 Sub-channel
1271  * ---------------
1272  *
1273  * As described in the definitions section, DDR5 has the notion of a
1274  * sub-channel. Here, a single bit is used to determine which of the
1275  * sub-channels to actually operate and utilize. Importantly the same
1276  * chip-select seems to apply to both halves of a given sub-channel.
1277  *
1278  * There is also a hash that is used here. The hash here utilizes the calculated
1279  * bank, column, and row and follows the same pattern used in the bank
1280  * calculation where we do a bunch of running exclusive-ORs and then do that
1281  * with the original value we found to get the new value. Because there's only
1282  * one bit for the sub-channel, we only have a single hash to consider.
1283  *
1284  * -------------------------------------------
1285  * Ranks, Chip-Select, and Rank Multiplication
1286  * -------------------------------------------
1287  *
1288  * The notion of ranks and the chip-select are interwoven. From a strict DDR4
1289  * RDIMM perspective, there are two lines that are dedicated for chip-selects
1290  * and then another two that are shared with three 'chip-id' bits that are used
1291  * in 3DS RDIMMs. In all cases the controller starts with two logical chip
1292  * selects and then uses something called rank multiplication to figure out how
1293  * to multiplex that and map to the broader set of things. Basically, in
1294  * reality, DDR4 RDIMMs allow for 4 bits to determine a rank and then 3DS RDIMMs
1295  * use 2 bits for a rank and 3 bits to select a stacked chip. In DDR5 this is
1296  * different and you just have 2 bits for a rank.
1297  *
1298  * It's not entirely clear from what we know from AMD, but it seems that we use
1299  * the RM bits as a way to basically go beyond the basic 2 bits of chip-select
1300  * which is determined based on which channel we logically activate. Initially
1301  * we treat this as two distinct things, here as that's what we get from the
1302  * hardware. There are two hashes here a chip-select and rank-multiplication
1303  * hash. Unlike the others, which rely on the bank, row, and column addresses,
1304  * this hash relies on the normalized address. So we calculate that mask and do
1305  * our same xor dance.
1306  *
1307  * There is one hash for each rank multiplication bit and chip-select bit. The
1308  * number of rank multiplication bits is given to us. The number of chip-select
1309  * bits is fixed, it's simply two because there are four base/mask registers and
1310  * logical chip-selects in a given UMC channel. The chip-select on some DDR5
1311  * platforms has a secondary exclusive-OR hash that can be applied. As this only
1312  * exists in some families, for any where it does exist, we seed it to be zero
1313  * so that it becomes a no-op.
1314  *
1315  * -----------
1316  * Future Work
1317  * -----------
1318  *
1319  * As the road goes ever on and on, down from the door where it began, there are
1320  * still some stops on the journey for this driver. In particular, here are the
1321  * major open areas that could be implemented to extend what this can do:
1322  *
1323  *   o The ability to transform a normalized channel address back to a system
1324  *     address. This is required for MCA/MCA-X error handling as those generally
1325  *     work in terms of channel addresses.
1326  *   o Integrating with the MCA/MCA-X error handling paths so that way we can
1327  *     take correct action in the face of ECC errors and allowing recovery from
1328  *     uncorrectable errors.
1329  *   o Providing memory controller information to FMA so that way it can opt to
1330  *     do predictive failure or give us more information about what is fault
1331  *     with ECC errors.
1332  *   o Figuring out if we will get MCEs for privileged address decoding and if
1333  *     so mapping those back to system addresses and related.
1334  *   o 3DS RDIMMs likely will need a little bit of work to ensure we're handling
1335  *     the resulting combination of the RM bits and CS and reporting it
1336  *     intelligently.
1337  *   o Support for the MI300-specific interleave decoding.
1338  *   o Understanding the error flow for CXL related address decoding and if we
1339  *     should support it in this driver.
1340  */
1341 
1342 #include <sys/types.h>
1343 #include <sys/file.h>
1344 #include <sys/errno.h>
1345 #include <sys/open.h>
1346 #include <sys/cred.h>
1347 #include <sys/ddi.h>
1348 #include <sys/sunddi.h>
1349 #include <sys/stat.h>
1350 #include <sys/conf.h>
1351 #include <sys/devops.h>
1352 #include <sys/cmn_err.h>
1353 #include <sys/x86_archext.h>
1354 #include <sys/sysmacros.h>
1355 #include <sys/mc.h>
1356 
1357 #include <zen_umc.h>
1358 #include <sys/amdzen/df.h>
1359 #include <sys/amdzen/umc.h>
1360 
1361 static zen_umc_t *zen_umc;
1362 
1363 /*
1364  * Per-CPU family information that describes the set of capabilities that they
1365  * implement. When adding support for new CPU generations, you must go through
1366  * what documentation you have and validate these. The best bet is to find a
1367  * similar processor and see what has changed. Unfortunately, there really isn't
1368  * a substitute for just basically checking every register. The family name
1369  * comes from the amdzen_c_family(). One additional note for new CPUs, if our
1370  * parent amdzen nexus driver does not attach (because the DF has changed PCI
1371  * IDs or more), then just adding something here will not be sufficient to make
1372  * it work.
1373  */
1374 static const zen_umc_fam_data_t zen_umc_fam_data[] = {
1375 	{
1376 		.zufd_family = X86_PF_AMD_NAPLES,
1377 		.zufd_dram_nrules = 16,
1378 		.zufd_cs_nrules = 2,
1379 		.zufd_umc_style = ZEN_UMC_UMC_S_DDR4,
1380 		.zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_CS
1381 	}, {
1382 		.zufd_family = X86_PF_HYGON_DHYANA,
1383 		.zufd_dram_nrules = 16,
1384 		.zufd_cs_nrules = 2,
1385 		.zufd_umc_style = ZEN_UMC_UMC_S_DDR4,
1386 		.zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_CS
1387 	}, {
1388 		.zufd_family = X86_PF_AMD_DALI,
1389 		.zufd_dram_nrules = 2,
1390 		.zufd_cs_nrules = 2,
1391 		.zufd_umc_style = ZEN_UMC_UMC_S_DDR4_APU,
1392 		.zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_CS
1393 	}, {
1394 		.zufd_family = X86_PF_AMD_ROME,
1395 		.zufd_flags = ZEN_UMC_FAM_F_NP2 | ZEN_UMC_FAM_F_NORM_HASH |
1396 		    ZEN_UMC_FAM_F_UMC_HASH,
1397 		.zufd_dram_nrules = 16,
1398 		.zufd_cs_nrules = 2,
1399 		.zufd_umc_style = ZEN_UMC_UMC_S_DDR4,
1400 		.zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_RM |
1401 		    UMC_CHAN_HASH_F_CS
1402 	}, {
1403 		.zufd_family = X86_PF_AMD_RENOIR,
1404 		.zufd_flags = ZEN_UMC_FAM_F_NORM_HASH,
1405 		.zufd_dram_nrules = 2,
1406 		.zufd_cs_nrules = 2,
1407 		.zufd_umc_style = ZEN_UMC_UMC_S_DDR4_APU,
1408 		.zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_PC |
1409 		    UMC_CHAN_HASH_F_CS
1410 	}, {
1411 		.zufd_family = X86_PF_AMD_MATISSE,
1412 		.zufd_flags = ZEN_UMC_FAM_F_NORM_HASH | ZEN_UMC_FAM_F_UMC_HASH,
1413 		.zufd_dram_nrules = 16,
1414 		.zufd_cs_nrules = 2,
1415 		.zufd_umc_style = ZEN_UMC_UMC_S_DDR4,
1416 		.zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_RM |
1417 		    UMC_CHAN_HASH_F_CS
1418 	}, {
1419 		.zufd_family = X86_PF_AMD_VAN_GOGH,
1420 		.zufd_flags = ZEN_UMC_FAM_F_NORM_HASH,
1421 		.zufd_dram_nrules = 2,
1422 		.zufd_cs_nrules = 2,
1423 		.zufd_umc_style = ZEN_UMC_UMC_S_HYBRID_LPDDR5,
1424 		.zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_CS
1425 	}, {
1426 		.zufd_family = X86_PF_AMD_MENDOCINO,
1427 		.zufd_flags = ZEN_UMC_FAM_F_NORM_HASH,
1428 		.zufd_dram_nrules = 2,
1429 		.zufd_cs_nrules = 2,
1430 		.zufd_umc_style = ZEN_UMC_UMC_S_HYBRID_LPDDR5,
1431 		.zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_CS
1432 	}, {
1433 		.zufd_family = X86_PF_AMD_MILAN,
1434 		.zufd_flags = ZEN_UMC_FAM_F_TARG_REMAP | ZEN_UMC_FAM_F_NP2 |
1435 		    ZEN_UMC_FAM_F_NORM_HASH | ZEN_UMC_FAM_F_UMC_HASH,
1436 		.zufd_dram_nrules = 16,
1437 		.zufd_cs_nrules = 2,
1438 		.zufd_umc_style = ZEN_UMC_UMC_S_DDR4,
1439 		.zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_RM |
1440 		    UMC_CHAN_HASH_F_CS
1441 	}, {
1442 		.zufd_family = X86_PF_AMD_GENOA,
1443 		.zufd_flags = ZEN_UMC_FAM_F_TARG_REMAP |
1444 		    ZEN_UMC_FAM_F_UMC_HASH | ZEN_UMC_FAM_F_UMC_EADDR |
1445 		    ZEN_UMC_FAM_F_CS_XOR,
1446 		.zufd_dram_nrules = 20,
1447 		.zufd_cs_nrules = 4,
1448 		.zufd_umc_style = ZEN_UMC_UMC_S_DDR5,
1449 		.zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_RM |
1450 		    UMC_CHAN_HASH_F_PC | UMC_CHAN_HASH_F_CS
1451 	}, {
1452 		.zufd_family = X86_PF_AMD_VERMEER,
1453 		.zufd_flags = ZEN_UMC_FAM_F_NORM_HASH | ZEN_UMC_FAM_F_UMC_HASH,
1454 		.zufd_dram_nrules = 16,
1455 		.zufd_cs_nrules = 2,
1456 		.zufd_umc_style = ZEN_UMC_UMC_S_DDR4,
1457 		.zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_RM |
1458 		    UMC_CHAN_HASH_F_CS,
1459 	}, {
1460 		.zufd_family = X86_PF_AMD_REMBRANDT,
1461 		.zufd_flags = ZEN_UMC_FAM_F_NORM_HASH,
1462 		.zufd_dram_nrules = 2,
1463 		.zufd_cs_nrules = 2,
1464 		.zufd_umc_style = ZEN_UMC_UMC_S_DDR5_APU,
1465 		.zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_CS
1466 	}, {
1467 		.zufd_family = X86_PF_AMD_CEZANNE,
1468 		.zufd_flags = ZEN_UMC_FAM_F_NORM_HASH,
1469 		.zufd_dram_nrules = 2,
1470 		.zufd_cs_nrules = 2,
1471 		.zufd_umc_style = ZEN_UMC_UMC_S_DDR4_APU,
1472 		.zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_PC |
1473 		    UMC_CHAN_HASH_F_CS
1474 	}, {
1475 		.zufd_family = X86_PF_AMD_RAPHAEL,
1476 		.zufd_flags = ZEN_UMC_FAM_F_CS_XOR,
1477 		.zufd_dram_nrules = 2,
1478 		.zufd_cs_nrules = 2,
1479 		.zufd_umc_style = ZEN_UMC_UMC_S_DDR5,
1480 		.zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_RM |
1481 		    UMC_CHAN_HASH_F_PC | UMC_CHAN_HASH_F_CS
1482 	}, {
1483 		.zufd_family = X86_PF_AMD_BERGAMO,
1484 		.zufd_flags = ZEN_UMC_FAM_F_TARG_REMAP |
1485 		    ZEN_UMC_FAM_F_UMC_HASH | ZEN_UMC_FAM_F_UMC_EADDR |
1486 		    ZEN_UMC_FAM_F_CS_XOR,
1487 		.zufd_dram_nrules = 20,
1488 		.zufd_cs_nrules = 4,
1489 		.zufd_umc_style = ZEN_UMC_UMC_S_DDR5,
1490 		.zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_RM |
1491 		    UMC_CHAN_HASH_F_PC | UMC_CHAN_HASH_F_CS
1492 	}, {
1493 		.zufd_family = X86_PF_AMD_PHOENIX,
1494 		.zufd_flags = ZEN_UMC_FAM_F_CS_XOR,
1495 		.zufd_dram_nrules = 2,
1496 		.zufd_cs_nrules = 2,
1497 		.zufd_umc_style = ZEN_UMC_UMC_S_DDR5_APU,
1498 		.zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_CS
1499 	}, {
1500 		.zufd_family = X86_PF_AMD_STRIX,
1501 		.zufd_flags = ZEN_UMC_FAM_F_CS_XOR,
1502 		.zufd_dram_nrules = 2,
1503 		.zufd_cs_nrules = 2,
1504 		.zufd_umc_style = ZEN_UMC_UMC_S_DDR5_APU,
1505 		.zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_CS
1506 	}, {
1507 		.zufd_family = X86_PF_AMD_KRACKAN,
1508 		.zufd_flags = ZEN_UMC_FAM_F_CS_XOR,
1509 		.zufd_dram_nrules = 2,
1510 		.zufd_cs_nrules = 2,
1511 		.zufd_umc_style = ZEN_UMC_UMC_S_DDR5_APU,
1512 		.zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_CS
1513 	}, {
1514 		.zufd_family = X86_PF_AMD_STRIX_HALO,
1515 		.zufd_flags = ZEN_UMC_FAM_F_CS_XOR,
1516 		.zufd_dram_nrules = 3,
1517 		.zufd_cs_nrules = 3,
1518 		.zufd_umc_style = ZEN_UMC_UMC_S_DDR5_APU,
1519 		.zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_CS
1520 	}, {
1521 		.zufd_family = X86_PF_AMD_GRANITE_RIDGE,
1522 		.zufd_flags = ZEN_UMC_FAM_F_CS_XOR,
1523 		.zufd_dram_nrules = 2,
1524 		.zufd_cs_nrules = 2,
1525 		.zufd_umc_style = ZEN_UMC_UMC_S_DDR5,
1526 		.zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_RM |
1527 		    UMC_CHAN_HASH_F_PC | UMC_CHAN_HASH_F_CS
1528 	}, {
1529 		.zufd_family = X86_PF_AMD_TURIN,
1530 		.zufd_flags = ZEN_UMC_FAM_F_UMC_HASH | ZEN_UMC_FAM_F_UMC_EADDR |
1531 		    ZEN_UMC_FAM_F_CS_XOR,
1532 		.zufd_dram_nrules = 20,
1533 		.zufd_cs_nrules = 4,
1534 		.zufd_umc_style = ZEN_UMC_UMC_S_DDR5,
1535 		.zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_RM |
1536 		    UMC_CHAN_HASH_F_PC | UMC_CHAN_HASH_F_CS
1537 	}, {
1538 		.zufd_family = X86_PF_AMD_DENSE_TURIN,
1539 		.zufd_flags = ZEN_UMC_FAM_F_UMC_HASH | ZEN_UMC_FAM_F_UMC_EADDR |
1540 		    ZEN_UMC_FAM_F_CS_XOR,
1541 		.zufd_dram_nrules = 20,
1542 		.zufd_cs_nrules = 4,
1543 		.zufd_umc_style = ZEN_UMC_UMC_S_DDR5,
1544 		.zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_RM |
1545 		    UMC_CHAN_HASH_F_PC | UMC_CHAN_HASH_F_CS
1546 	}
1547 };
1548 
1549 /*
1550  * We use this for the DDR4 and Hybrid DDR4 + LPDDR5 tables to map between the
1551  * specific enumerated speeds which are encoded values and the corresponding
1552  * memory clock and speed. For all DDR4 and LPDDR5 items we assume a a 1:2 ratio
1553  * between them. This is not used for the pure DDR5 / LPDDR5 entries because of
1554  * how the register just encodes the raw value in MHz.
1555  */
1556 typedef struct zen_umc_freq_map {
1557 	uint32_t zufm_reg;
1558 	uint32_t zufm_mhz;
1559 	uint32_t zufm_mts2;
1560 	uint32_t zufm_mts4;
1561 } zen_umc_freq_map_t;
1562 
1563 static const zen_umc_freq_map_t zen_umc_ddr4_map[] = {
1564 	{ UMC_DRAMCFG_DDR4_MEMCLK_667, 667, 1333, 0 },
1565 	{ UMC_DRAMCFG_DDR4_MEMCLK_800, 800, 1600, 0 },
1566 	{ UMC_DRAMCFG_DDR4_MEMCLK_933, 933, 1866, 0 },
1567 	{ UMC_DRAMCFG_DDR4_MEMCLK_1067, 1067, 2133, 0 },
1568 	{ UMC_DRAMCFG_DDR4_MEMCLK_1200, 1200, 2400, 0 },
1569 	{ UMC_DRAMCFG_DDR4_MEMCLK_1333, 1333, 2666, 0 },
1570 	{ UMC_DRAMCFG_DDR4_MEMCLK_1467, 1467, 2933, 0 },
1571 	{ UMC_DRAMCFG_DDR4_MEMCLK_1600, 1600, 3200, 0 }
1572 };
1573 
1574 static const zen_umc_freq_map_t zen_umc_lpddr5_map[] = {
1575 	{ UMC_DRAMCFG_HYB_MEMCLK_333, 333, 667, 1333 },
1576 	{ UMC_DRAMCFG_HYB_MEMCLK_400, 400, 800, 1600 },
1577 	{ UMC_DRAMCFG_HYB_MEMCLK_533, 533, 1066, 2133 },
1578 	{ UMC_DRAMCFG_HYB_MEMCLK_687, 687, 1375, 2750 },
1579 	{ UMC_DRAMCFG_HYB_MEMCLK_750, 750, 1500, 3000 },
1580 	{ UMC_DRAMCFG_HYB_MEMCLK_800, 800, 1600, 3200 },
1581 	{ UMC_DRAMCFG_HYB_MEMCLK_933, 933, 1866, 3733 },
1582 	{ UMC_DRAMCFG_HYB_MEMCLK_1066, 1066, 2133, 4267 },
1583 	{ UMC_DRAMCFG_HYB_MEMCLK_1200, 1200, 2400, 4800 },
1584 	{ UMC_DRAMCFG_HYB_MEMCLK_1375, 1375, 2750, 5500 },
1585 	{ UMC_DRAMCFG_HYB_MEMCLK_1500, 1500, 3000, 6000 },
1586 	{ UMC_DRAMCFG_HYB_MEMCLK_1600, 1600, 3200, 6400 }
1587 
1588 };
1589 
1590 static boolean_t
zen_umc_identify(zen_umc_t * umc)1591 zen_umc_identify(zen_umc_t *umc)
1592 {
1593 	for (uint_t i = 0; i < ARRAY_SIZE(zen_umc_fam_data); i++) {
1594 		if (zen_umc_fam_data[i].zufd_family == umc->umc_family) {
1595 			umc->umc_fdata = &zen_umc_fam_data[i];
1596 			return (B_TRUE);
1597 		}
1598 	}
1599 
1600 	return (B_FALSE);
1601 }
1602 
1603 /*
1604  * This operates on DFv2, DFv3, and DFv3.5 DRAM rules, which generally speaking
1605  * are in similar register locations and meanings, but the size of bits in
1606  * memory is not consistent.
1607  */
1608 static int
zen_umc_read_dram_rule_df_23(zen_umc_t * umc,const uint_t dfno,const uint_t inst,const uint_t ruleno,df_dram_rule_t * rule)1609 zen_umc_read_dram_rule_df_23(zen_umc_t *umc, const uint_t dfno,
1610     const uint_t inst, const uint_t ruleno, df_dram_rule_t *rule)
1611 {
1612 	int ret;
1613 	uint32_t base, limit;
1614 	uint64_t dbase, dlimit;
1615 	uint16_t addr_ileave, chan_ileave, sock_ileave, die_ileave, dest;
1616 	boolean_t hash = B_FALSE;
1617 	zen_umc_df_t *df = &umc->umc_dfs[dfno];
1618 
1619 	if ((ret = amdzen_c_df_read32(dfno, inst, DF_DRAM_BASE_V2(ruleno),
1620 	    &base)) != 0) {
1621 		dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM base "
1622 		    "register %u on 0x%x/0x%x: %d", ruleno, dfno, inst, ret);
1623 		return (ret);
1624 	}
1625 
1626 	if ((ret = amdzen_c_df_read32(dfno, inst, DF_DRAM_LIMIT_V2(ruleno),
1627 	    &limit)) != 0) {
1628 		dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM limit "
1629 		    "register %u on 0x%x/0x%x: %d", ruleno, dfno, inst, ret);
1630 		return (ret);
1631 	}
1632 
1633 
1634 	rule->ddr_raw_base = base;
1635 	rule->ddr_raw_limit = limit;
1636 	rule->ddr_raw_ileave = rule->ddr_raw_ctrl = 0;
1637 
1638 	if (!DF_DRAM_BASE_V2_GET_VALID(base)) {
1639 		return (0);
1640 	}
1641 
1642 	/*
1643 	 * Extract all values from the registers and then normalize. While there
1644 	 * are often different bit patterns for the values, the interpretation
1645 	 * is the same across all the Zen 1-3 parts. That is while which bits
1646 	 * may be used for say channel interleave vary, the values of them are
1647 	 * consistent.
1648 	 */
1649 	rule->ddr_flags |= DF_DRAM_F_VALID;
1650 	if (DF_DRAM_BASE_V2_GET_HOLE_EN(base)) {
1651 		rule->ddr_flags |= DF_DRAM_F_HOLE;
1652 	}
1653 
1654 	dbase = DF_DRAM_BASE_V2_GET_BASE(base);
1655 	dlimit = DF_DRAM_LIMIT_V2_GET_LIMIT(limit);
1656 	switch (umc->umc_df_rev) {
1657 	case DF_REV_2:
1658 		addr_ileave = DF_DRAM_BASE_V2_GET_ILV_ADDR(base);
1659 		chan_ileave = DF_DRAM_BASE_V2_GET_ILV_CHAN(base);
1660 		die_ileave = DF_DRAM_LIMIT_V2_GET_ILV_DIE(limit);
1661 		sock_ileave = DF_DRAM_LIMIT_V2_GET_ILV_SOCK(limit);
1662 		dest = DF_DRAM_LIMIT_V2_GET_DEST_ID(limit);
1663 		break;
1664 	case DF_REV_3:
1665 		addr_ileave = DF_DRAM_BASE_V3_GET_ILV_ADDR(base);
1666 		sock_ileave = DF_DRAM_BASE_V3_GET_ILV_SOCK(base);
1667 		die_ileave = DF_DRAM_BASE_V3_GET_ILV_DIE(base);
1668 		chan_ileave = DF_DRAM_BASE_V3_GET_ILV_CHAN(base);
1669 		dest = DF_DRAM_LIMIT_V3_GET_DEST_ID(limit);
1670 		break;
1671 	case DF_REV_3P5:
1672 		addr_ileave = DF_DRAM_BASE_V3P5_GET_ILV_ADDR(base);
1673 		sock_ileave = DF_DRAM_BASE_V3P5_GET_ILV_SOCK(base);
1674 		die_ileave = DF_DRAM_BASE_V3P5_GET_ILV_DIE(base);
1675 		chan_ileave = DF_DRAM_BASE_V3P5_GET_ILV_CHAN(base);
1676 		dest = DF_DRAM_LIMIT_V3P5_GET_DEST_ID(limit);
1677 		break;
1678 	default:
1679 		dev_err(umc->umc_dip, CE_WARN, "!encountered unsupported "
1680 		    "DF revision processing DRAM rules: 0x%x", umc->umc_df_rev);
1681 		return (-1);
1682 	}
1683 
1684 	rule->ddr_base = dbase << DF_DRAM_BASE_V2_BASE_SHIFT;
1685 	rule->ddr_sock_ileave_bits = sock_ileave;
1686 	rule->ddr_die_ileave_bits = die_ileave;
1687 	switch (addr_ileave) {
1688 	case DF_DRAM_ILV_ADDR_8:
1689 	case DF_DRAM_ILV_ADDR_9:
1690 	case DF_DRAM_ILV_ADDR_10:
1691 	case DF_DRAM_ILV_ADDR_11:
1692 	case DF_DRAM_ILV_ADDR_12:
1693 		break;
1694 	default:
1695 		dev_err(umc->umc_dip, CE_WARN, "!encountered invalid address "
1696 		    "interleave on rule %u, df/inst 0x%x/0x%x: 0x%x", ruleno,
1697 		    dfno, inst, addr_ileave);
1698 		return (EINVAL);
1699 	}
1700 	rule->ddr_addr_start = DF_DRAM_ILV_ADDR_BASE + addr_ileave;
1701 
1702 	switch (chan_ileave) {
1703 	case DF_DRAM_BASE_V2_ILV_CHAN_1:
1704 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_1CH;
1705 		break;
1706 	case DF_DRAM_BASE_V2_ILV_CHAN_2:
1707 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_2CH;
1708 		break;
1709 	case DF_DRAM_BASE_V2_ILV_CHAN_4:
1710 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_4CH;
1711 		break;
1712 	case DF_DRAM_BASE_V2_ILV_CHAN_8:
1713 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_8CH;
1714 		break;
1715 	case DF_DRAM_BASE_V2_ILV_CHAN_6:
1716 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_6CH;
1717 		break;
1718 	case DF_DRAM_BASE_V2_ILV_CHAN_COD4_2:
1719 		hash = B_TRUE;
1720 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_COD4_2CH;
1721 		break;
1722 	case DF_DRAM_BASE_V2_ILV_CHAN_COD2_4:
1723 		hash = B_TRUE;
1724 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_COD2_4CH;
1725 		break;
1726 	case DF_DRAM_BASE_V2_ILV_CHAN_COD1_8:
1727 		hash = B_TRUE;
1728 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_COD1_8CH;
1729 		break;
1730 	default:
1731 		dev_err(umc->umc_dip, CE_WARN, "!encountered invalid channel "
1732 		    "interleave on rule %u, df/inst 0x%x/0x%x: 0x%x", ruleno,
1733 		    dfno, inst, chan_ileave);
1734 		return (EINVAL);
1735 	}
1736 
1737 	/*
1738 	 * If hashing is enabled, note which hashing rules apply to this
1739 	 * address. This is done to smooth over the differences between DFv3 and
1740 	 * DFv4, where the flags are in the rules themselves in the latter, but
1741 	 * global today.
1742 	 */
1743 	if (hash) {
1744 		if ((df->zud_flags & ZEN_UMC_DF_F_HASH_16_18) != 0) {
1745 			rule->ddr_flags |= DF_DRAM_F_HASH_16_18;
1746 		}
1747 
1748 		if ((df->zud_flags & ZEN_UMC_DF_F_HASH_21_23) != 0) {
1749 			rule->ddr_flags |= DF_DRAM_F_HASH_21_23;
1750 		}
1751 
1752 		if ((df->zud_flags & ZEN_UMC_DF_F_HASH_30_32) != 0) {
1753 			rule->ddr_flags |= DF_DRAM_F_HASH_30_32;
1754 		}
1755 	}
1756 
1757 	/*
1758 	 * While DFv4 makes remapping explicit, it is basically always enabled
1759 	 * and used on supported platforms prior to that point. So flag such
1760 	 * supported platforms as ones that need to do this. On those systems
1761 	 * there is only one set of remap rules for an entire DF that are
1762 	 * determined based on the target socket. To indicate that we use the
1763 	 * DF_DRAM_F_REMAP_SOCK flag below and skip setting a remap target.
1764 	 */
1765 	if ((umc->umc_fdata->zufd_flags & ZEN_UMC_FAM_F_TARG_REMAP) != 0) {
1766 		rule->ddr_flags |= DF_DRAM_F_REMAP_EN | DF_DRAM_F_REMAP_SOCK;
1767 	}
1768 
1769 	rule->ddr_limit = (dlimit << DF_DRAM_LIMIT_V2_LIMIT_SHIFT) +
1770 	    DF_DRAM_LIMIT_V2_LIMIT_EXCL;
1771 	rule->ddr_dest_fabid = dest;
1772 
1773 	return (0);
1774 }
1775 
1776 static int
zen_umc_read_dram_rule_df_4(zen_umc_t * umc,const uint_t dfno,const uint_t inst,const uint_t ruleno,df_dram_rule_t * rule)1777 zen_umc_read_dram_rule_df_4(zen_umc_t *umc, const uint_t dfno,
1778     const uint_t inst, const uint_t ruleno, df_dram_rule_t *rule)
1779 {
1780 	int ret;
1781 	uint16_t addr_ileave;
1782 	uint32_t base, limit, ilv, ctl;
1783 
1784 	if ((ret = amdzen_c_df_read32(dfno, inst, DF_DRAM_BASE_V4(ruleno),
1785 	    &base)) != 0) {
1786 		dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM base "
1787 		    "register %u on 0x%x/0x%x: %d", ruleno, dfno, inst, ret);
1788 		return (ret);
1789 	}
1790 
1791 	if ((ret = amdzen_c_df_read32(dfno, inst, DF_DRAM_LIMIT_V4(ruleno),
1792 	    &limit)) != 0) {
1793 		dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM limit "
1794 		    "register %u on 0x%x/0x%x: %d", ruleno, dfno, inst, ret);
1795 		return (ret);
1796 	}
1797 
1798 	if ((ret = amdzen_c_df_read32(dfno, inst, DF_DRAM_ILV_V4(ruleno),
1799 	    &ilv)) != 0) {
1800 		dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM "
1801 		    "interleave register %u on 0x%x/0x%x: %d", ruleno, dfno,
1802 		    inst, ret);
1803 		return (ret);
1804 	}
1805 
1806 	if ((ret = amdzen_c_df_read32(dfno, inst, DF_DRAM_CTL_V4(ruleno),
1807 	    &ctl)) != 0) {
1808 		dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM control "
1809 		    "register %u on 0x%x/0x%x: %d", ruleno, dfno, inst, ret);
1810 		return (ret);
1811 	}
1812 
1813 	rule->ddr_raw_base = base;
1814 	rule->ddr_raw_limit = limit;
1815 	rule->ddr_raw_ileave = ilv;
1816 	rule->ddr_raw_ctrl = ctl;
1817 
1818 	if (!DF_DRAM_CTL_V4_GET_VALID(ctl)) {
1819 		return (0);
1820 	}
1821 
1822 	rule->ddr_flags |= DF_DRAM_F_VALID;
1823 	rule->ddr_base = DF_DRAM_BASE_V4_GET_ADDR(base);
1824 	rule->ddr_base = rule->ddr_base << DF_DRAM_BASE_V4_BASE_SHIFT;
1825 	rule->ddr_limit = DF_DRAM_LIMIT_V4_GET_ADDR(limit);
1826 	rule->ddr_limit = (rule->ddr_limit << DF_DRAM_LIMIT_V4_LIMIT_SHIFT) +
1827 	    DF_DRAM_LIMIT_V4_LIMIT_EXCL;
1828 	rule->ddr_dest_fabid = DF_DRAM_CTL_V4_GET_DEST_ID(ctl);
1829 
1830 	if (DF_DRAM_CTL_V4_GET_HASH_1G(ctl) != 0) {
1831 		rule->ddr_flags |= DF_DRAM_F_HASH_30_32;
1832 	}
1833 
1834 	if (DF_DRAM_CTL_V4_GET_HASH_2M(ctl) != 0) {
1835 		rule->ddr_flags |= DF_DRAM_F_HASH_21_23;
1836 	}
1837 
1838 	if (DF_DRAM_CTL_V4_GET_HASH_64K(ctl) != 0) {
1839 		rule->ddr_flags |= DF_DRAM_F_HASH_16_18;
1840 	}
1841 
1842 	if (DF_DRAM_CTL_V4_GET_REMAP_EN(ctl) != 0) {
1843 		rule->ddr_flags |= DF_DRAM_F_REMAP_EN;
1844 		rule->ddr_remap_ent = DF_DRAM_CTL_V4_GET_REMAP_SEL(ctl);
1845 	}
1846 
1847 	if (DF_DRAM_CTL_V4_GET_HOLE_EN(ctl) != 0) {
1848 		rule->ddr_flags |= DF_DRAM_F_HOLE;
1849 	}
1850 
1851 	if (DF_DRAM_CTL_V4_GET_SCM(ctl) != 0) {
1852 		rule->ddr_flags |= DF_DRAM_F_SCM;
1853 	}
1854 
1855 	rule->ddr_sock_ileave_bits = DF_DRAM_ILV_V4_GET_SOCK(ilv);
1856 	rule->ddr_die_ileave_bits = DF_DRAM_ILV_V4_GET_DIE(ilv);
1857 	switch (DF_DRAM_ILV_V4_GET_CHAN(ilv)) {
1858 	case DF_DRAM_ILV_V4_CHAN_1:
1859 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_1CH;
1860 		break;
1861 	case DF_DRAM_ILV_V4_CHAN_2:
1862 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_2CH;
1863 		break;
1864 	case DF_DRAM_ILV_V4_CHAN_4:
1865 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_4CH;
1866 		break;
1867 	case DF_DRAM_ILV_V4_CHAN_8:
1868 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_8CH;
1869 		break;
1870 	case DF_DRAM_ILV_V4_CHAN_16:
1871 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_16CH;
1872 		break;
1873 	case DF_DRAM_ILV_V4_CHAN_32:
1874 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_32CH;
1875 		break;
1876 	case DF_DRAM_ILV_V4_CHAN_NPS4_2CH:
1877 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS4_2CH;
1878 		break;
1879 	case DF_DRAM_ILV_V4_CHAN_NPS2_4CH:
1880 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS2_4CH;
1881 		break;
1882 	case DF_DRAM_ILV_V4_CHAN_NPS1_8CH:
1883 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS1_8CH;
1884 		break;
1885 	case DF_DRAM_ILV_V4_CHAN_NPS4_3CH:
1886 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS4_3CH;
1887 		break;
1888 	case DF_DRAM_ILV_V4_CHAN_NPS2_6CH:
1889 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS2_6CH;
1890 		break;
1891 	case DF_DRAM_ILV_V4_CHAN_NPS1_12CH:
1892 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS1_12CH;
1893 		break;
1894 	case DF_DRAM_ILV_V4_CHAN_NPS2_5CH:
1895 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS2_5CH;
1896 		break;
1897 	case DF_DRAM_ILV_V4_CHAN_NPS1_10CH:
1898 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS1_10CH;
1899 		break;
1900 	default:
1901 		dev_err(umc->umc_dip, CE_WARN, "!encountered invalid channel "
1902 		    "interleave on rule %u, df/inst 0x%x/0x%x: 0x%x", ruleno,
1903 		    dfno, inst, DF_DRAM_ILV_V4_GET_CHAN(ilv));
1904 
1905 		break;
1906 	}
1907 
1908 	addr_ileave = DF_DRAM_ILV_V4_GET_ADDR(ilv);
1909 	switch (addr_ileave) {
1910 	case DF_DRAM_ILV_ADDR_8:
1911 	case DF_DRAM_ILV_ADDR_9:
1912 	case DF_DRAM_ILV_ADDR_10:
1913 	case DF_DRAM_ILV_ADDR_11:
1914 	case DF_DRAM_ILV_ADDR_12:
1915 		break;
1916 	default:
1917 		dev_err(umc->umc_dip, CE_WARN, "!encountered invalid address "
1918 		    "interleave on rule %u, df/inst 0x%x/0x%x: 0x%x", ruleno,
1919 		    dfno, inst, addr_ileave);
1920 		return (EINVAL);
1921 	}
1922 	rule->ddr_addr_start = DF_DRAM_ILV_ADDR_BASE + addr_ileave;
1923 
1924 	return (0);
1925 }
1926 
1927 static int
zen_umc_read_dram_rule_df_4d2(zen_umc_t * umc,const uint_t dfno,const uint_t inst,const uint_t ruleno,df_dram_rule_t * rule)1928 zen_umc_read_dram_rule_df_4d2(zen_umc_t *umc, const uint_t dfno,
1929     const uint_t inst, const uint_t ruleno, df_dram_rule_t *rule)
1930 {
1931 	int ret;
1932 	uint16_t addr_ileave;
1933 	uint32_t base, limit, ilv, ctl;
1934 
1935 	if ((ret = amdzen_c_df_read32(dfno, inst, DF_DRAM_BASE_V4D2(ruleno),
1936 	    &base)) != 0) {
1937 		dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM base "
1938 		    "register %u on 0x%x/0x%x: %d", ruleno, dfno, inst, ret);
1939 		return (ret);
1940 	}
1941 
1942 	if ((ret = amdzen_c_df_read32(dfno, inst, DF_DRAM_LIMIT_V4D2(ruleno),
1943 	    &limit)) != 0) {
1944 		dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM limit "
1945 		    "register %u on 0x%x/0x%x: %d", ruleno, dfno, inst, ret);
1946 		return (ret);
1947 	}
1948 
1949 	if ((ret = amdzen_c_df_read32(dfno, inst, DF_DRAM_ILV_V4D2(ruleno),
1950 	    &ilv)) != 0) {
1951 		dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM "
1952 		    "interleave register %u on 0x%x/0x%x: %d", ruleno, dfno,
1953 		    inst, ret);
1954 		return (ret);
1955 	}
1956 
1957 	if ((ret = amdzen_c_df_read32(dfno, inst, DF_DRAM_CTL_V4D2(ruleno),
1958 	    &ctl)) != 0) {
1959 		dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM control "
1960 		    "register %u on 0x%x/0x%x: %d", ruleno, dfno, inst, ret);
1961 		return (ret);
1962 	}
1963 
1964 	rule->ddr_raw_base = base;
1965 	rule->ddr_raw_limit = limit;
1966 	rule->ddr_raw_ileave = ilv;
1967 	rule->ddr_raw_ctrl = ctl;
1968 
1969 	if (!DF_DRAM_CTL_V4_GET_VALID(ctl)) {
1970 		return (0);
1971 	}
1972 
1973 	rule->ddr_flags |= DF_DRAM_F_VALID;
1974 	rule->ddr_base = DF_DRAM_BASE_V4_GET_ADDR(base);
1975 	rule->ddr_base = rule->ddr_base << DF_DRAM_BASE_V4_BASE_SHIFT;
1976 	rule->ddr_limit = DF_DRAM_LIMIT_V4_GET_ADDR(limit);
1977 	rule->ddr_limit = (rule->ddr_limit << DF_DRAM_LIMIT_V4_LIMIT_SHIFT) +
1978 	    DF_DRAM_LIMIT_V4_LIMIT_EXCL;
1979 	rule->ddr_dest_fabid = DF_DRAM_CTL_V4D2_GET_DEST_ID(ctl);
1980 
1981 	if (DF_DRAM_CTL_V4D2_GET_HASH_1T(ctl) != 0) {
1982 		rule->ddr_flags |= DF_DRAM_F_HASH_40_42;
1983 	}
1984 
1985 	if (DF_DRAM_CTL_V4_GET_HASH_1G(ctl) != 0) {
1986 		rule->ddr_flags |= DF_DRAM_F_HASH_30_32;
1987 	}
1988 
1989 	if (DF_DRAM_CTL_V4_GET_HASH_2M(ctl) != 0) {
1990 		rule->ddr_flags |= DF_DRAM_F_HASH_21_23;
1991 	}
1992 
1993 	if (DF_DRAM_CTL_V4_GET_HASH_64K(ctl) != 0) {
1994 		rule->ddr_flags |= DF_DRAM_F_HASH_16_18;
1995 	}
1996 
1997 	if (DF_DRAM_CTL_V4D2_GET_HASH_4K(ctl) != 0) {
1998 		rule->ddr_flags |= DF_DRAM_F_HASH_12_14;
1999 	}
2000 
2001 	if (DF_DRAM_CTL_V4_GET_REMAP_EN(ctl) != 0) {
2002 		rule->ddr_flags |= DF_DRAM_F_REMAP_EN;
2003 		rule->ddr_remap_ent = DF_DRAM_CTL_V4D2_GET_REMAP_SEL(ctl);
2004 	}
2005 
2006 	if (DF_DRAM_CTL_V4_GET_HOLE_EN(ctl) != 0) {
2007 		rule->ddr_flags |= DF_DRAM_F_HOLE;
2008 	}
2009 
2010 	if (DF_DRAM_CTL_V4_GET_SCM(ctl) != 0) {
2011 		rule->ddr_flags |= DF_DRAM_F_SCM;
2012 	}
2013 
2014 	rule->ddr_sock_ileave_bits = DF_DRAM_ILV_V4_GET_SOCK(ilv);
2015 	rule->ddr_die_ileave_bits = DF_DRAM_ILV_V4_GET_DIE(ilv);
2016 	switch (DF_DRAM_ILV_V4D2_GET_CHAN(ilv)) {
2017 	case DF_DRAM_ILV_V4D2_CHAN_1:
2018 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_1CH;
2019 		break;
2020 	case DF_DRAM_ILV_V4D2_CHAN_2:
2021 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_2CH;
2022 		break;
2023 	case DF_DRAM_ILV_V4D2_CHAN_4:
2024 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_4CH;
2025 		break;
2026 	case DF_DRAM_ILV_V4D2_CHAN_8:
2027 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_8CH;
2028 		break;
2029 	case DF_DRAM_ILV_V4D2_CHAN_16:
2030 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_16CH;
2031 		break;
2032 	case DF_DRAM_ILV_V4D2_CHAN_32:
2033 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_32CH;
2034 		break;
2035 	case DF_DRAM_ILV_V4D2_CHAN_NPS1_16S8CH_1K:
2036 		if (rule->ddr_sock_ileave_bits == 0) {
2037 			rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS1_16CH_1K;
2038 		} else {
2039 			rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS1_8CH_1K;
2040 		}
2041 		break;
2042 	case DF_DRAM_ILV_V4D2_CHAN_NPS0_24CH_1K:
2043 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS0_24CH_1K;
2044 		break;
2045 	case DF_DRAM_ILV_V4D2_CHAN_NPS4_2CH_1K:
2046 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS4_2CH_1K;
2047 		break;
2048 	case DF_DRAM_ILV_V4D2_CHAN_NPS2_4CH_1K:
2049 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS2_4CH_1K;
2050 		break;
2051 	case DF_DRAM_ILV_V4D2_CHAN_NPS1_8S4CH_1K:
2052 		if (rule->ddr_sock_ileave_bits == 0) {
2053 			rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS1_8CH_1K;
2054 		} else {
2055 			rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS2_4CH_1K;
2056 		}
2057 		break;
2058 	case DF_DRAM_ILV_V4D2_CHAN_NPS4_3CH_1K:
2059 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS4_3CH_1K;
2060 		break;
2061 	case DF_DRAM_ILV_V4D2_CHAN_NPS2_6CH_1K:
2062 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS2_6CH_1K;
2063 		break;
2064 	case DF_DRAM_ILV_V4D2_CHAN_NPS1_12CH_1K:
2065 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS1_12CH_1K;
2066 		break;
2067 	case DF_DRAM_ILV_V4D2_CHAN_NPS2_5CH_1K:
2068 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS2_5CH_1K;
2069 		break;
2070 	case DF_DRAM_ILV_V4D2_CHAN_NPS1_10CH_1K:
2071 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS1_10CH_1K;
2072 		break;
2073 	case DF_DRAM_ILV_V4D2_CHAN_MI3H_8CH:
2074 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_MI3H_8CH;
2075 		break;
2076 	case DF_DRAM_ILV_V4D2_CHAN_MI3H_16CH:
2077 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_MI3H_16CH;
2078 		break;
2079 	case DF_DRAM_ILV_V4D2_CHAN_MI3H_32CH:
2080 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_MI3H_32CH;
2081 		break;
2082 	case DF_DRAM_ILV_V4D2_CHAN_NPS4_2CH_2K:
2083 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS4_2CH_2K;
2084 		break;
2085 	case DF_DRAM_ILV_V4D2_CHAN_NPS2_4CH_2K:
2086 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS2_4CH_2K;
2087 		break;
2088 	case DF_DRAM_ILV_V4D2_CHAN_NPS1_8S4CH_2K:
2089 		if (rule->ddr_sock_ileave_bits == 0) {
2090 			rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS1_8CH_2K;
2091 		} else {
2092 			rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS2_4CH_2K;
2093 		}
2094 		break;
2095 	case DF_DRAM_ILV_V4D2_CHAN_NPS1_16S8CH_2K:
2096 		if (rule->ddr_sock_ileave_bits == 0) {
2097 			rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS1_16CH_2K;
2098 		} else {
2099 			rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS1_8CH_2K;
2100 		}
2101 		break;
2102 	case DF_DRAM_ILV_V4D2_CHAN_NPS4_3CH_2K:
2103 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS4_3CH_2K;
2104 		break;
2105 	case DF_DRAM_ILV_V4D2_CHAN_NPS2_6CH_2K:
2106 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS2_6CH_2K;
2107 		break;
2108 	case DF_DRAM_ILV_V4D2_CHAN_NPS1_12CH_2K:
2109 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS1_12CH_2K;
2110 		break;
2111 	case DF_DRAM_ILV_V4D2_CHAN_NPS0_24CH_2K:
2112 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS0_24CH_2K;
2113 		break;
2114 	case DF_DRAM_ILV_V4D2_CHAN_NPS2_5CH_2K:
2115 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS2_5CH_2K;
2116 		break;
2117 	case DF_DRAM_ILV_V4D2_CHAN_NPS2_10CH_2K:
2118 		rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS1_10CH_2K;
2119 		break;
2120 	default:
2121 		dev_err(umc->umc_dip, CE_WARN, "!encountered invalid channel "
2122 		    "interleave on rule %u, df/inst 0x%x/0x%x: 0x%x", ruleno,
2123 		    dfno, inst, DF_DRAM_ILV_V4_GET_CHAN(ilv));
2124 		break;
2125 	}
2126 
2127 	addr_ileave = DF_DRAM_ILV_V4_GET_ADDR(ilv);
2128 	switch (addr_ileave) {
2129 	case DF_DRAM_ILV_ADDR_8:
2130 	case DF_DRAM_ILV_ADDR_9:
2131 	case DF_DRAM_ILV_ADDR_10:
2132 	case DF_DRAM_ILV_ADDR_11:
2133 	case DF_DRAM_ILV_ADDR_12:
2134 		break;
2135 	default:
2136 		dev_err(umc->umc_dip, CE_WARN, "!encountered invalid address "
2137 		    "interleave on rule %u, df/inst 0x%x/0x%x: 0x%x", ruleno,
2138 		    dfno, inst, addr_ileave);
2139 		return (EINVAL);
2140 	}
2141 	rule->ddr_addr_start = DF_DRAM_ILV_ADDR_BASE + addr_ileave;
2142 
2143 	return (0);
2144 }
2145 
2146 static int
zen_umc_read_dram_rule(zen_umc_t * umc,const uint_t dfno,const uint_t instid,const uint_t ruleno,df_dram_rule_t * rule)2147 zen_umc_read_dram_rule(zen_umc_t *umc, const uint_t dfno, const uint_t instid,
2148     const uint_t ruleno, df_dram_rule_t *rule)
2149 {
2150 	int ret;
2151 
2152 	switch (umc->umc_df_rev) {
2153 	case DF_REV_2:
2154 	case DF_REV_3:
2155 	case DF_REV_3P5:
2156 		ret = zen_umc_read_dram_rule_df_23(umc, dfno, instid, ruleno,
2157 		    rule);
2158 		break;
2159 	case DF_REV_4:
2160 		ret = zen_umc_read_dram_rule_df_4(umc, dfno, instid, ruleno,
2161 		    rule);
2162 		break;
2163 	case DF_REV_4D2:
2164 		ret = zen_umc_read_dram_rule_df_4d2(umc, dfno, instid, ruleno,
2165 		    rule);
2166 		break;
2167 	default:
2168 		dev_err(umc->umc_dip, CE_WARN, "!encountered unsupported "
2169 		    "DF revision processing DRAM rules: 0x%x", umc->umc_df_rev);
2170 		return (-1);
2171 	}
2172 
2173 	if (ret != 0) {
2174 		dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM "
2175 		    "rule %u on df/inst 0x%x/0x%x: %d", ruleno,
2176 		    dfno, instid, ret);
2177 		return (-1);
2178 	}
2179 
2180 	return (0);
2181 }
2182 
2183 /*
2184  * The Extended remapper has up to 4 remap rule sets. Each set addresses up to
2185  * 16 remap rules (ala DFv4), but the width of the targets is larger so they are
2186  * all split up amongst 3 registers instead. CPUs indicate support for this in
2187  * the DF::DfCapability register. Not all CPUs actually use all such entries. We
2188  * will read all entries, even if they are not in the PPR with the assumption
2189  * that a CPU DRAM rule will only ever refer to the ones that exist for the
2190  * moment. Our expectation is that these reserved registers are all 0s or all
2191  * 1s, but that has yet to be proven.
2192  */
2193 static int
zen_umc_read_extremap(zen_umc_t * umc,zen_umc_df_t * df,const uint_t instid)2194 zen_umc_read_extremap(zen_umc_t *umc, zen_umc_df_t *df, const uint_t instid)
2195 {
2196 	const uint_t dfno = df->zud_dfno;
2197 	const df_reg_def_t remapA[ZEN_UMC_MAX_CS_REMAPS] = {
2198 	    DF_CS_REMAP0A_V4D2, DF_CS_REMAP1A_V4D2, DF_CS_REMAP2A_V4D2,
2199 	    DF_CS_REMAP3A_V4D2 };
2200 	const df_reg_def_t remapB[ZEN_UMC_MAX_CS_REMAPS] = {
2201 	    DF_CS_REMAP0B_V4D2, DF_CS_REMAP1B_V4D2, DF_CS_REMAP2B_V4D2,
2202 	    DF_CS_REMAP3B_V4D2 };
2203 	const df_reg_def_t remapC[ZEN_UMC_MAX_CS_REMAPS] = {
2204 	    DF_CS_REMAP0C_V4D2, DF_CS_REMAP1C_V4D2, DF_CS_REMAP2C_V4D2,
2205 	    DF_CS_REMAP3C_V4D2 };
2206 
2207 	df->zud_cs_nremap = ZEN_UMC_MAX_CS_REMAPS;
2208 	for (uint_t i = 0; i < df->zud_cs_nremap; i++) {
2209 		int ret;
2210 		uint32_t rm[3];
2211 		zen_umc_cs_remap_t *remap = &df->zud_remap[i];
2212 
2213 		if ((ret = amdzen_c_df_read32(dfno, instid, remapA[i],
2214 		    &rm[0])) != 0) {
2215 			dev_err(umc->umc_dip, CE_WARN, "!failed to read "
2216 			    "df/inst 0x%x/0x%x remap rule %uA: %d", dfno,
2217 			    instid, i, ret);
2218 			return (-1);
2219 		}
2220 
2221 		if ((ret = amdzen_c_df_read32(dfno, instid, remapB[i],
2222 		    &rm[1])) != 0) {
2223 			dev_err(umc->umc_dip, CE_WARN, "!failed to read "
2224 			    "df/inst 0x%x/0x%x remap rule %uB: %d", dfno,
2225 			    instid, i, ret);
2226 			return (-1);
2227 		}
2228 
2229 		if ((ret = amdzen_c_df_read32(dfno, instid, remapC[i],
2230 		    &rm[2])) != 0) {
2231 			dev_err(umc->umc_dip, CE_WARN, "!failed to read "
2232 			    "df/inst 0x%x/0x%x remap rule %uC: %d", dfno,
2233 			    instid, i, ret);
2234 			return (-1);
2235 		}
2236 
2237 		/*
2238 		 * Remap rule A has CS 0-5, B 6-11, C 12-15
2239 		 */
2240 		remap->csr_nremaps = ZEN_UMC_MAX_REMAP_ENTS;
2241 		for (uint_t ent = 0; ent < remap->csr_nremaps; ent++) {
2242 			uint_t reg = ent / ZEN_UMC_REMAP_PER_REG_4D2;
2243 			uint_t idx = ent % ZEN_UMC_REMAP_PER_REG_4D2;
2244 			remap->csr_remaps[ent] =
2245 			    DF_CS_REMAP_GET_CSX_V4B(rm[reg], idx);
2246 		}
2247 	}
2248 	return (0);
2249 }
2250 
2251 static int
zen_umc_read_remap(zen_umc_t * umc,zen_umc_df_t * df,const uint_t instid)2252 zen_umc_read_remap(zen_umc_t *umc, zen_umc_df_t *df, const uint_t instid)
2253 {
2254 	uint_t nremaps, nents;
2255 	const uint_t dfno = df->zud_dfno;
2256 	const df_reg_def_t milan_remap0[ZEN_UMC_MILAN_CS_NREMAPS] = {
2257 	    DF_SKT0_CS_REMAP0_V3, DF_SKT1_CS_REMAP0_V3 };
2258 	const df_reg_def_t milan_remap1[ZEN_UMC_MILAN_CS_NREMAPS] = {
2259 	    DF_SKT0_CS_REMAP1_V3, DF_SKT1_CS_REMAP1_V3 };
2260 	const df_reg_def_t dfv4_remapA[ZEN_UMC_MAX_CS_REMAPS] = {
2261 	    DF_CS_REMAP0A_V4, DF_CS_REMAP1A_V4, DF_CS_REMAP2A_V4,
2262 	    DF_CS_REMAP3A_V4 };
2263 	const df_reg_def_t dfv4_remapB[ZEN_UMC_MAX_CS_REMAPS] = {
2264 	    DF_CS_REMAP0B_V4, DF_CS_REMAP1B_V4, DF_CS_REMAP2B_V4,
2265 	    DF_CS_REMAP3B_V4 };
2266 	const df_reg_def_t *remapA, *remapB;
2267 
2268 
2269 	switch (umc->umc_df_rev) {
2270 	case DF_REV_3:
2271 		nremaps = ZEN_UMC_MILAN_CS_NREMAPS;
2272 		nents = ZEN_UMC_MILAN_REMAP_ENTS;
2273 		remapA = milan_remap0;
2274 		remapB = milan_remap1;
2275 		break;
2276 	case DF_REV_4:
2277 		nremaps = ZEN_UMC_MAX_CS_REMAPS;
2278 		nents = ZEN_UMC_MAX_REMAP_ENTS;
2279 		remapA = dfv4_remapA;
2280 		remapB = dfv4_remapB;
2281 		break;
2282 	case DF_REV_4D2:
2283 		return (zen_umc_read_extremap(umc, df, instid));
2284 	default:
2285 		dev_err(umc->umc_dip, CE_WARN, "!encountered unsupported DF "
2286 		    "revision processing remap rules: 0x%x", umc->umc_df_rev);
2287 		return (-1);
2288 	}
2289 
2290 	df->zud_cs_nremap = nremaps;
2291 	for (uint_t i = 0; i < nremaps; i++) {
2292 		int ret;
2293 		uint32_t rm[2];
2294 		zen_umc_cs_remap_t *remap = &df->zud_remap[i];
2295 
2296 		if ((ret = amdzen_c_df_read32(dfno, instid, remapA[i],
2297 		    &rm[0])) != 0) {
2298 			dev_err(umc->umc_dip, CE_WARN, "!failed to read "
2299 			    "df/inst 0x%x/0x%x remap socket %u-0/A: %d", dfno,
2300 			    instid, i, ret);
2301 			return (-1);
2302 		}
2303 
2304 		if ((ret = amdzen_c_df_read32(dfno, instid, remapB[i],
2305 		    &rm[1])) != 0) {
2306 			dev_err(umc->umc_dip, CE_WARN, "!failed to read "
2307 			    "df/inst 0x%x/0x%x remap socket %u-1/B: %d", dfno,
2308 			    instid, i, ret);
2309 			return (-1);
2310 		}
2311 
2312 		remap->csr_nremaps = nents;
2313 		for (uint_t ent = 0; ent < remap->csr_nremaps; ent++) {
2314 			uint_t reg = ent / ZEN_UMC_REMAP_PER_REG;
2315 			uint_t idx = ent % ZEN_UMC_REMAP_PER_REG;
2316 			remap->csr_remaps[ent] = DF_CS_REMAP_GET_CSX(rm[reg],
2317 			    idx);
2318 		}
2319 	}
2320 
2321 	return (0);
2322 }
2323 
2324 /*
2325  * Now that we have a CCM, we have several different tasks ahead of us:
2326  *
2327  *   o Determine whether or not the DRAM hole is valid.
2328  *   o Snapshot all of the system address rules and translate them into our
2329  *     generic format.
2330  *   o Determine if there are any rules to retarget things (currently
2331  *     Milan/Genoa).
2332  *   o Determine if there are any other hashing rules enabled.
2333  *
2334  * We only require this from a single CCM as these are currently required to be
2335  * the same across all of them.
2336  */
2337 static int
zen_umc_fill_ccm_cb(const uint_t dfno,const uint32_t fabid,const uint32_t instid,void * arg)2338 zen_umc_fill_ccm_cb(const uint_t dfno, const uint32_t fabid,
2339     const uint32_t instid, void *arg)
2340 {
2341 	zen_umc_t *umc = arg;
2342 	zen_umc_df_t *df = &umc->umc_dfs[dfno];
2343 	df_reg_def_t hole;
2344 	int ret;
2345 	uint32_t val;
2346 
2347 	df->zud_dfno = dfno;
2348 	df->zud_ccm_inst = instid;
2349 
2350 	/*
2351 	 * Read the DF::DfCapability register. This is not instance specific.
2352 	 */
2353 	if ((ret = amdzen_c_df_read32_bcast(dfno, DF_CAPAB, &df->zud_capab)) !=
2354 	    0) {
2355 		dev_err(umc->umc_dip, CE_WARN, "!failed to read DF Capability "
2356 		    "register: %d", ret);
2357 		return (-1);
2358 	}
2359 
2360 	/*
2361 	 * Next get the DRAM hole. This has the same layout, albeit different
2362 	 * registers across our different platforms.
2363 	 */
2364 	switch (umc->umc_df_rev) {
2365 	case DF_REV_2:
2366 	case DF_REV_3:
2367 	case DF_REV_3P5:
2368 		hole = DF_DRAM_HOLE_V2;
2369 		break;
2370 	case DF_REV_4:
2371 	case DF_REV_4D2:
2372 		hole = DF_DRAM_HOLE_V4;
2373 		break;
2374 	default:
2375 		dev_err(umc->umc_dip, CE_WARN, "!encountered unsupported "
2376 		    "DF version: 0x%x", umc->umc_df_rev);
2377 		return (-1);
2378 	}
2379 
2380 	if ((ret = amdzen_c_df_read32(dfno, instid, hole, &val)) != 0) {
2381 		dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM Hole: %d",
2382 		    ret);
2383 		return (-1);
2384 	}
2385 
2386 	df->zud_hole_raw = val;
2387 	if (DF_DRAM_HOLE_GET_VALID(val)) {
2388 		uint64_t t;
2389 
2390 		df->zud_flags |= ZEN_UMC_DF_F_HOLE_VALID;
2391 		t = DF_DRAM_HOLE_GET_BASE(val);
2392 		df->zud_hole_base = t << DF_DRAM_HOLE_BASE_SHIFT;
2393 	}
2394 
2395 	/*
2396 	 * Prior to Zen 4, the hash information was global and applied to all
2397 	 * COD rules globally. Check if we're on such a system and snapshot this
2398 	 * so we can use it during the rule application. Note, this was added in
2399 	 * DFv3.
2400 	 */
2401 	if (umc->umc_df_rev == DF_REV_3 || umc->umc_df_rev == DF_REV_3P5) {
2402 		uint32_t globctl;
2403 
2404 		if ((ret = amdzen_c_df_read32(dfno, instid, DF_GLOB_CTL_V3,
2405 		    &globctl)) != 0) {
2406 			dev_err(umc->umc_dip, CE_WARN, "!failed to read global "
2407 			    "control: %d", ret);
2408 			return (-1);
2409 		}
2410 
2411 		df->zud_glob_ctl_raw = globctl;
2412 		if (DF_GLOB_CTL_V3_GET_HASH_1G(globctl) != 0) {
2413 			df->zud_flags |= ZEN_UMC_DF_F_HASH_30_32;
2414 		}
2415 
2416 		if (DF_GLOB_CTL_V3_GET_HASH_2M(globctl) != 0) {
2417 			df->zud_flags |= ZEN_UMC_DF_F_HASH_21_23;
2418 		}
2419 
2420 		if (DF_GLOB_CTL_V3_GET_HASH_64K(globctl) != 0) {
2421 			df->zud_flags |= ZEN_UMC_DF_F_HASH_16_18;
2422 		}
2423 	}
2424 
2425 	df->zud_dram_nrules = umc->umc_fdata->zufd_dram_nrules;
2426 	for (uint_t i = 0; i < umc->umc_fdata->zufd_dram_nrules; i++) {
2427 		if (zen_umc_read_dram_rule(umc, dfno, instid, i,
2428 		    &df->zud_rules[i]) != 0) {
2429 			return (-1);
2430 		}
2431 	}
2432 
2433 	/*
2434 	 * Once AMD got past DF v4.0 there was a feature bit that indicates
2435 	 * support for the remapping engine in the DF_CAPAB (DF::DfCapability)
2436 	 * register. Prior to that we must use our table.
2437 	 */
2438 	if ((umc->umc_df_rev >= DF_REV_4D2 &&
2439 	    DF_CAPAB_GET_EXTCSREMAP(df->zud_capab) != 0) ||
2440 	    (umc->umc_fdata->zufd_flags & ZEN_UMC_FAM_F_TARG_REMAP) != 0) {
2441 		if (zen_umc_read_remap(umc, df, instid) != 0) {
2442 			return (-1);
2443 		}
2444 	}
2445 
2446 	/*
2447 	 * We only want a single entry, so always return 1 to terminate us
2448 	 * early.
2449 	 */
2450 	return (1);
2451 }
2452 
2453 /*
2454  * At this point we can go through and calculate the size of the DIMM that we've
2455  * found. While it would be nice to determine this from the SPD data, we can
2456  * figure this out entirely based upon the information in the memory controller.
2457  *
2458  * This works by first noting that DDR4, LPDDR4, DDR5, and LPDDR5 are all built
2459  * around 64-bit data channels. This means that each row and column provides up
2460  * 64-bits (ignoring ECC) of data. There are a number of banks and bank groups.
2461  * The memory controller tracks the total number of bits that are used for each.
2462  * While DDR5 introduces sub-channels, we don't need to worry about those here,
2463  * because ultimately the sub-channel just splits the 64-bit bus we're assuming
2464  * into 2x 32-bit buses. While they can be independently selected, they should
2465  * have equivalent capacities.
2466  *
2467  * The most confusing part of this is that there is one of these related to each
2468  * rank on the device. The UMC natively has two 'chip-selects', each of which is
2469  * used to correspond to a rank. There are then separately multiple rm bits in
2470  * each chip-select. As far as we can tell the PSP or SMU programs the number of
2471  * rm bits to be zero when you have a dual-rank device.
2472  *
2473  * We end up summing each chip-select rather than assuming that the chip-selects
2474  * are identical. In theory some amount of asymmetric DIMMs exist in the wild,
2475  * but we don't know of many systems using them.
2476  */
2477 static void
zen_umc_calc_dimm_size(umc_dimm_t * dimm)2478 zen_umc_calc_dimm_size(umc_dimm_t *dimm)
2479 {
2480 	dimm->ud_dimm_size = 0;
2481 	for (uint_t i = 0; i < ZEN_UMC_MAX_CHAN_BASE; i++) {
2482 		uint64_t nrc;
2483 		const umc_cs_t *cs = &dimm->ud_cs[i];
2484 
2485 		if (!cs->ucs_base.udb_valid && !cs->ucs_sec.udb_valid) {
2486 			continue;
2487 		}
2488 
2489 		nrc = cs->ucs_nrow_lo + cs->ucs_nrow_hi + cs->ucs_ncol;
2490 		dimm->ud_dimm_size += (8ULL << nrc) * (1 << cs->ucs_nbanks) *
2491 		    (1 << cs->ucs_nrm);
2492 	}
2493 }
2494 
2495 /*
2496  * This is used to fill in the common properties about a DIMM. This should occur
2497  * after the rank information has been filled out. The information used is the
2498  * same between DDR4 and DDR5 DIMMs. The only major difference is the register
2499  * offset.
2500  */
2501 static boolean_t
zen_umc_fill_dimm_common(zen_umc_t * umc,zen_umc_df_t * df,zen_umc_chan_t * chan,const uint_t dimmno,boolean_t ddr4_style)2502 zen_umc_fill_dimm_common(zen_umc_t *umc, zen_umc_df_t *df, zen_umc_chan_t *chan,
2503     const uint_t dimmno, boolean_t ddr4_style)
2504 {
2505 	umc_dimm_t *dimm;
2506 	int ret;
2507 	smn_reg_t reg;
2508 	uint32_t val;
2509 	const uint32_t id = chan->chan_logid;
2510 
2511 	dimm = &chan->chan_dimms[dimmno];
2512 	dimm->ud_dimmno = dimmno;
2513 
2514 	if (ddr4_style) {
2515 		reg = UMC_DIMMCFG_DDR4(id, dimmno);
2516 	} else {
2517 		reg = UMC_DIMMCFG_DDR5(id, dimmno);
2518 	}
2519 	if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
2520 		dev_err(umc->umc_dip, CE_WARN, "failed to read DIMM "
2521 		    "configuration register %x: %d", SMN_REG_ADDR(reg), ret);
2522 		return (B_FALSE);
2523 	}
2524 	dimm->ud_dimmcfg_raw = val;
2525 
2526 	if (UMC_DIMMCFG_GET_X16(val) != 0) {
2527 		dimm->ud_width = UMC_DIMM_W_X16;
2528 	} else if (UMC_DIMMCFG_GET_X4(val) != 0) {
2529 		dimm->ud_width = UMC_DIMM_W_X4;
2530 	} else {
2531 		dimm->ud_width = UMC_DIMM_W_X8;
2532 	}
2533 
2534 	if (UMC_DIMMCFG_GET_3DS(val) != 0) {
2535 		dimm->ud_kind = UMC_DIMM_K_3DS_RDIMM;
2536 	} else if (UMC_DIMMCFG_GET_LRDIMM(val) != 0) {
2537 		dimm->ud_kind = UMC_DIMM_K_LRDIMM;
2538 	} else if (UMC_DIMMCFG_GET_RDIMM(val) != 0) {
2539 		dimm->ud_kind = UMC_DIMM_K_RDIMM;
2540 	} else {
2541 		dimm->ud_kind = UMC_DIMM_K_UDIMM;
2542 	}
2543 
2544 	/*
2545 	 * DIMM information in a UMC can be somewhat confusing. There are quite
2546 	 * a number of non-zero reset values that are here. Flag whether or not
2547 	 * we think this entry should be usable based on enabled chip-selects.
2548 	 */
2549 	for (uint_t i = 0; i < ZEN_UMC_MAX_CHAN_BASE; i++) {
2550 		if (dimm->ud_cs[i].ucs_base.udb_valid ||
2551 		    dimm->ud_cs[i].ucs_sec.udb_valid) {
2552 			dimm->ud_flags |= UMC_DIMM_F_VALID;
2553 			break;
2554 		}
2555 	}
2556 
2557 	/*
2558 	 * The remaining calculations we only want to perform if we have actual
2559 	 * data for a DIMM.
2560 	 */
2561 	if ((dimm->ud_flags & UMC_DIMM_F_VALID) == 0) {
2562 		return (B_TRUE);
2563 	}
2564 
2565 	zen_umc_calc_dimm_size(dimm);
2566 
2567 	return (B_TRUE);
2568 }
2569 
2570 /*
2571  * Fill all the information about a DDR4 DIMM. In the DDR4 UMC, some of this
2572  * information is on a per-chip select basis while at other times it is on a
2573  * per-DIMM basis.  In general, chip-selects 0/1 correspond to DIMM 0, and
2574  * chip-selects 2/3 correspond to DIMM 1. To normalize things with the DDR5 UMC
2575  * which generally has things stored on a per-rank/chips-select basis, we
2576  * duplicate information that is DIMM-wide into the chip-select data structure
2577  * (umc_cs_t).
2578  */
2579 static boolean_t
zen_umc_fill_chan_dimm_ddr4(zen_umc_t * umc,zen_umc_df_t * df,zen_umc_chan_t * chan,const uint_t dimmno)2580 zen_umc_fill_chan_dimm_ddr4(zen_umc_t *umc, zen_umc_df_t *df,
2581     zen_umc_chan_t *chan, const uint_t dimmno)
2582 {
2583 	umc_dimm_t *dimm;
2584 	umc_cs_t *cs0, *cs1;
2585 	const uint32_t id = chan->chan_logid;
2586 	int ret;
2587 	uint32_t val;
2588 	smn_reg_t reg;
2589 
2590 	ASSERT3U(dimmno, <, ZEN_UMC_MAX_DIMMS);
2591 	dimm = &chan->chan_dimms[dimmno];
2592 	cs0 = &dimm->ud_cs[0];
2593 	cs1 = &dimm->ud_cs[1];
2594 
2595 	/*
2596 	 * DDR4 organization has initial data that exists on a per-chip select
2597 	 * basis. The rest of it is on a per-DIMM basis. First we grab the
2598 	 * per-chip-select data. After this for loop, we will always duplicate
2599 	 * all data that we gather into both chip-selects.
2600 	 */
2601 	for (uint_t i = 0; i < ZEN_UMC_MAX_CS_PER_DIMM; i++) {
2602 		uint64_t addr;
2603 		const uint16_t reginst = i + dimmno * 2;
2604 		reg = UMC_BASE(id, reginst);
2605 		if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
2606 			dev_err(umc->umc_dip, CE_WARN, "failed to read base "
2607 			    "register %x: %d", SMN_REG_ADDR(reg), ret);
2608 			return (B_FALSE);
2609 		}
2610 
2611 		addr = (uint64_t)UMC_BASE_GET_ADDR(val) << UMC_BASE_ADDR_SHIFT;
2612 		dimm->ud_cs[i].ucs_base.udb_base = addr;
2613 		dimm->ud_cs[i].ucs_base.udb_valid = UMC_BASE_GET_EN(val);
2614 
2615 		reg = UMC_BASE_SEC(id, reginst);
2616 		if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
2617 			dev_err(umc->umc_dip, CE_WARN, "failed to read "
2618 			    "secondary base register %x: %d", SMN_REG_ADDR(reg),
2619 			    ret);
2620 			return (B_FALSE);
2621 		}
2622 
2623 		addr = (uint64_t)UMC_BASE_GET_ADDR(val) << UMC_BASE_ADDR_SHIFT;
2624 		dimm->ud_cs[i].ucs_sec.udb_base = addr;
2625 		dimm->ud_cs[i].ucs_sec.udb_valid = UMC_BASE_GET_EN(val);
2626 	}
2627 
2628 	reg = UMC_MASK_DDR4(id, dimmno);
2629 	if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
2630 		dev_err(umc->umc_dip, CE_WARN, "failed to read mask register "
2631 		    "%x: %d", SMN_REG_ADDR(reg), ret);
2632 		return (B_FALSE);
2633 	}
2634 
2635 	/*
2636 	 * When we extract the masks, hardware only checks a limited range of
2637 	 * bits. Therefore we need to always OR in those lower order bits.
2638 	 */
2639 	cs0->ucs_base_mask = (uint64_t)UMC_MASK_GET_ADDR(val) <<
2640 	    UMC_MASK_ADDR_SHIFT;
2641 	cs0->ucs_base_mask |= (1 << UMC_MASK_ADDR_SHIFT) - 1;
2642 	cs1->ucs_base_mask = cs0->ucs_base_mask;
2643 
2644 	reg = UMC_MASK_SEC_DDR4(id, dimmno);
2645 	if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
2646 		dev_err(umc->umc_dip, CE_WARN, "failed to read secondary mask "
2647 		    "register %x: %d", SMN_REG_ADDR(reg), ret);
2648 		return (B_FALSE);
2649 	}
2650 	cs0->ucs_sec_mask = (uint64_t)UMC_MASK_GET_ADDR(val) <<
2651 	    UMC_MASK_ADDR_SHIFT;
2652 	cs0->ucs_sec_mask |= (1 << UMC_MASK_ADDR_SHIFT) - 1;
2653 	cs1->ucs_sec_mask = cs0->ucs_sec_mask;
2654 
2655 	reg = UMC_ADDRCFG_DDR4(id, dimmno);
2656 	if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
2657 		dev_err(umc->umc_dip, CE_WARN, "failed to read address config "
2658 		    "register %x: %d", SMN_REG_ADDR(reg), ret);
2659 		return (B_FALSE);
2660 	}
2661 
2662 	cs0->ucs_nbanks = UMC_ADDRCFG_GET_NBANK_BITS(val) +
2663 	    UMC_ADDRCFG_NBANK_BITS_BASE;
2664 	cs1->ucs_nbanks = cs0->ucs_nbanks;
2665 	cs0->ucs_ncol = UMC_ADDRCFG_GET_NCOL_BITS(val) +
2666 	    UMC_ADDRCFG_NCOL_BITS_BASE;
2667 	cs1->ucs_ncol = cs0->ucs_ncol;
2668 	cs0->ucs_nrow_hi = UMC_ADDRCFG_DDR4_GET_NROW_BITS_HI(val);
2669 	cs1->ucs_nrow_hi = cs0->ucs_nrow_hi;
2670 	cs0->ucs_nrow_lo = UMC_ADDRCFG_GET_NROW_BITS_LO(val) +
2671 	    UMC_ADDRCFG_NROW_BITS_LO_BASE;
2672 	cs1->ucs_nrow_lo = cs0->ucs_nrow_lo;
2673 	cs0->ucs_nbank_groups = UMC_ADDRCFG_GET_NBANKGRP_BITS(val);
2674 	cs1->ucs_nbank_groups = cs0->ucs_nbank_groups;
2675 	/*
2676 	 * As the chip-select XORs don't always show up, use a dummy value
2677 	 * that'll result in no change occurring here.
2678 	 */
2679 	cs0->ucs_cs_xor = cs1->ucs_cs_xor = 0;
2680 
2681 	/*
2682 	 * APUs don't seem to support various rank select bits.
2683 	 */
2684 	if (umc->umc_fdata->zufd_umc_style == ZEN_UMC_UMC_S_DDR4) {
2685 		cs0->ucs_nrm = UMC_ADDRCFG_DDR4_GET_NRM_BITS(val);
2686 		cs1->ucs_nrm = cs0->ucs_nrm;
2687 	} else {
2688 		cs0->ucs_nrm = cs1->ucs_nrm = 0;
2689 	}
2690 
2691 	reg = UMC_ADDRSEL_DDR4(id, dimmno);
2692 	if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
2693 		dev_err(umc->umc_dip, CE_WARN, "failed to read bank address "
2694 		    "select register %x: %d", SMN_REG_ADDR(reg), ret);
2695 		return (B_FALSE);
2696 	}
2697 	cs0->ucs_row_hi_bit = UMC_ADDRSEL_DDR4_GET_ROW_HI(val) +
2698 	    UMC_ADDRSEL_DDR4_ROW_HI_BASE;
2699 	cs1->ucs_row_hi_bit = cs0->ucs_row_hi_bit;
2700 	cs0->ucs_row_low_bit = UMC_ADDRSEL_GET_ROW_LO(val) +
2701 	    UMC_ADDRSEL_ROW_LO_BASE;
2702 	cs1->ucs_row_low_bit = cs0->ucs_row_low_bit;
2703 	cs0->ucs_bank_bits[0] = UMC_ADDRSEL_GET_BANK0(val) +
2704 	    UMC_ADDRSEL_BANK_BASE;
2705 	cs0->ucs_bank_bits[1] = UMC_ADDRSEL_GET_BANK1(val) +
2706 	    UMC_ADDRSEL_BANK_BASE;
2707 	cs0->ucs_bank_bits[2] = UMC_ADDRSEL_GET_BANK2(val) +
2708 	    UMC_ADDRSEL_BANK_BASE;
2709 	cs0->ucs_bank_bits[3] = UMC_ADDRSEL_GET_BANK3(val) +
2710 	    UMC_ADDRSEL_BANK_BASE;
2711 	cs0->ucs_bank_bits[4] = UMC_ADDRSEL_GET_BANK4(val) +
2712 	    UMC_ADDRSEL_BANK_BASE;
2713 	bcopy(cs0->ucs_bank_bits, cs1->ucs_bank_bits,
2714 	    sizeof (cs0->ucs_bank_bits));
2715 
2716 	reg = UMC_COLSEL_LO_DDR4(id, dimmno);
2717 	if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
2718 		dev_err(umc->umc_dip, CE_WARN, "failed to read column address "
2719 		    "select low register %x: %d", SMN_REG_ADDR(reg), ret);
2720 		return (B_FALSE);
2721 	}
2722 	for (uint_t i = 0; i < ZEN_UMC_MAX_COLSEL_PER_REG; i++) {
2723 		cs0->ucs_col_bits[i] = UMC_COLSEL_REMAP_GET_COL(val, i) +
2724 		    UMC_COLSEL_LO_BASE;
2725 	}
2726 
2727 	reg = UMC_COLSEL_HI_DDR4(id, dimmno);
2728 	if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
2729 		dev_err(umc->umc_dip, CE_WARN, "failed to read column address "
2730 		    "select high register %x: %d", SMN_REG_ADDR(reg), ret);
2731 		return (B_FALSE);
2732 	}
2733 	for (uint_t i = 0; i < ZEN_UMC_MAX_COLSEL_PER_REG; i++) {
2734 		cs0->ucs_col_bits[i + ZEN_UMC_MAX_COLSEL_PER_REG] =
2735 		    UMC_COLSEL_REMAP_GET_COL(val, i) + UMC_COLSEL_HI_BASE;
2736 	}
2737 	bcopy(cs0->ucs_col_bits, cs1->ucs_col_bits, sizeof (cs0->ucs_col_bits));
2738 
2739 	/*
2740 	 * The next two registers give us information about a given rank select.
2741 	 * In the APUs, the inversion bits are there; however, the actual bit
2742 	 * selects are not. In this case we read the reserved bits regardless.
2743 	 * They should be ignored due to the fact that the number of banks is
2744 	 * zero.
2745 	 */
2746 	reg = UMC_RMSEL_DDR4(id, dimmno);
2747 	if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
2748 		dev_err(umc->umc_dip, CE_WARN, "failed to read rank address "
2749 		    "select register %x: %d", SMN_REG_ADDR(reg), ret);
2750 		return (B_FALSE);
2751 	}
2752 	cs0->ucs_inv_msbs = UMC_RMSEL_DDR4_GET_INV_MSBE(val);
2753 	cs1->ucs_inv_msbs = UMC_RMSEL_DDR4_GET_INV_MSBO(val);
2754 	cs0->ucs_rm_bits[0] = UMC_RMSEL_DDR4_GET_RM0(val) +
2755 	    UMC_RMSEL_BASE;
2756 	cs0->ucs_rm_bits[1] = UMC_RMSEL_DDR4_GET_RM1(val) +
2757 	    UMC_RMSEL_BASE;
2758 	cs0->ucs_rm_bits[2] = UMC_RMSEL_DDR4_GET_RM2(val) +
2759 	    UMC_RMSEL_BASE;
2760 	bcopy(cs0->ucs_rm_bits, cs1->ucs_rm_bits, sizeof (cs0->ucs_rm_bits));
2761 
2762 	reg = UMC_RMSEL_SEC_DDR4(id, dimmno);
2763 	if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
2764 		dev_err(umc->umc_dip, CE_WARN, "failed to read secondary rank "
2765 		    "address select register %x: %d", SMN_REG_ADDR(reg), ret);
2766 		return (B_FALSE);
2767 	}
2768 	cs0->ucs_inv_msbs_sec = UMC_RMSEL_DDR4_GET_INV_MSBE(val);
2769 	cs1->ucs_inv_msbs_sec = UMC_RMSEL_DDR4_GET_INV_MSBO(val);
2770 	cs0->ucs_rm_bits_sec[0] = UMC_RMSEL_DDR4_GET_RM0(val) +
2771 	    UMC_RMSEL_BASE;
2772 	cs0->ucs_rm_bits_sec[1] = UMC_RMSEL_DDR4_GET_RM1(val) +
2773 	    UMC_RMSEL_BASE;
2774 	cs0->ucs_rm_bits_sec[2] = UMC_RMSEL_DDR4_GET_RM2(val) +
2775 	    UMC_RMSEL_BASE;
2776 	bcopy(cs0->ucs_rm_bits_sec, cs1->ucs_rm_bits_sec,
2777 	    sizeof (cs0->ucs_rm_bits_sec));
2778 
2779 	return (zen_umc_fill_dimm_common(umc, df, chan, dimmno, B_TRUE));
2780 }
2781 
2782 /*
2783  * The DDR5 based systems are organized such that almost all the information we
2784  * care about is split between two different chip-select structures in the UMC
2785  * hardware SMN space.
2786  */
2787 static boolean_t
zen_umc_fill_chan_rank_ddr5(zen_umc_t * umc,zen_umc_df_t * df,zen_umc_chan_t * chan,const uint_t dimmno,const uint_t rankno)2788 zen_umc_fill_chan_rank_ddr5(zen_umc_t *umc, zen_umc_df_t *df,
2789     zen_umc_chan_t *chan, const uint_t dimmno, const uint_t rankno)
2790 {
2791 	int ret;
2792 	umc_cs_t *cs;
2793 	uint32_t val;
2794 	smn_reg_t reg;
2795 	const uint32_t id = chan->chan_logid;
2796 	const uint32_t regno = dimmno * 2 + rankno;
2797 
2798 	ASSERT3U(dimmno, <, ZEN_UMC_MAX_DIMMS);
2799 	ASSERT3U(rankno, <, ZEN_UMC_MAX_CS_PER_DIMM);
2800 	cs = &chan->chan_dimms[dimmno].ud_cs[rankno];
2801 
2802 	reg = UMC_BASE(id, regno);
2803 	if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
2804 		dev_err(umc->umc_dip, CE_WARN, "failed to read base "
2805 		    "register %x: %d", SMN_REG_ADDR(reg), ret);
2806 		return (B_FALSE);
2807 	}
2808 	cs->ucs_base.udb_base = (uint64_t)UMC_BASE_GET_ADDR(val) <<
2809 	    UMC_BASE_ADDR_SHIFT;
2810 	cs->ucs_base.udb_valid = UMC_BASE_GET_EN(val);
2811 	if ((umc->umc_fdata->zufd_flags & ZEN_UMC_FAM_F_UMC_EADDR) != 0) {
2812 		uint64_t addr;
2813 
2814 		reg = UMC_BASE_EXT_DDR5(id, regno);
2815 		if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) !=
2816 		    0) {
2817 			dev_err(umc->umc_dip, CE_WARN, "failed to read "
2818 			    "extended base register %x: %d", SMN_REG_ADDR(reg),
2819 			    ret);
2820 			return (B_FALSE);
2821 		}
2822 
2823 		addr = (uint64_t)UMC_BASE_EXT_GET_ADDR(val) <<
2824 		    UMC_BASE_EXT_ADDR_SHIFT;
2825 		cs->ucs_base.udb_base |= addr;
2826 	}
2827 
2828 	reg = UMC_BASE_SEC(id, regno);
2829 	if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
2830 		dev_err(umc->umc_dip, CE_WARN, "failed to read secondary base "
2831 		    "register %x: %d", SMN_REG_ADDR(reg), ret);
2832 		return (B_FALSE);
2833 	}
2834 	cs->ucs_sec.udb_base = (uint64_t)UMC_BASE_GET_ADDR(val) <<
2835 	    UMC_BASE_ADDR_SHIFT;
2836 	cs->ucs_sec.udb_valid = UMC_BASE_GET_EN(val);
2837 	if ((umc->umc_fdata->zufd_flags & ZEN_UMC_FAM_F_UMC_EADDR) != 0) {
2838 		uint64_t addr;
2839 
2840 		reg = UMC_BASE_EXT_SEC_DDR5(id, regno);
2841 		if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) !=
2842 		    0) {
2843 			dev_err(umc->umc_dip, CE_WARN, "failed to read "
2844 			    "extended secondary base register %x: %d",
2845 			    SMN_REG_ADDR(reg), ret);
2846 			return (B_FALSE);
2847 		}
2848 
2849 		addr = (uint64_t)UMC_BASE_EXT_GET_ADDR(val) <<
2850 		    UMC_BASE_EXT_ADDR_SHIFT;
2851 		cs->ucs_sec.udb_base |= addr;
2852 	}
2853 
2854 	reg = UMC_MASK_DDR5(id, regno);
2855 	if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
2856 		dev_err(umc->umc_dip, CE_WARN, "failed to read mask "
2857 		    "register %x: %d", SMN_REG_ADDR(reg), ret);
2858 		return (B_FALSE);
2859 	}
2860 	cs->ucs_base_mask = (uint64_t)UMC_MASK_GET_ADDR(val) <<
2861 	    UMC_MASK_ADDR_SHIFT;
2862 	cs->ucs_base_mask |= (1 << UMC_MASK_ADDR_SHIFT) - 1;
2863 	if ((umc->umc_fdata->zufd_flags & ZEN_UMC_FAM_F_UMC_EADDR) != 0) {
2864 		uint64_t addr;
2865 
2866 		reg = UMC_MASK_EXT_DDR5(id, regno);
2867 		if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) !=
2868 		    0) {
2869 			dev_err(umc->umc_dip, CE_WARN, "failed to read "
2870 			    "extended mask register %x: %d", SMN_REG_ADDR(reg),
2871 			    ret);
2872 			return (B_FALSE);
2873 		}
2874 
2875 		addr = (uint64_t)UMC_MASK_EXT_GET_ADDR(val) <<
2876 		    UMC_MASK_EXT_ADDR_SHIFT;
2877 		cs->ucs_base_mask |= addr;
2878 	}
2879 
2880 
2881 	reg = UMC_MASK_SEC_DDR5(id, regno);
2882 	if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
2883 		dev_err(umc->umc_dip, CE_WARN, "failed to read secondary mask "
2884 		    "register %x: %d", SMN_REG_ADDR(reg), ret);
2885 		return (B_FALSE);
2886 	}
2887 	cs->ucs_sec_mask = (uint64_t)UMC_MASK_GET_ADDR(val) <<
2888 	    UMC_MASK_ADDR_SHIFT;
2889 	cs->ucs_sec_mask |= (1 << UMC_MASK_ADDR_SHIFT) - 1;
2890 	if ((umc->umc_fdata->zufd_flags & ZEN_UMC_FAM_F_UMC_EADDR) != 0) {
2891 		uint64_t addr;
2892 
2893 		reg = UMC_MASK_EXT_SEC_DDR5(id, regno);
2894 		if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) !=
2895 		    0) {
2896 			dev_err(umc->umc_dip, CE_WARN, "failed to read "
2897 			    "extended mask register %x: %d", SMN_REG_ADDR(reg),
2898 			    ret);
2899 			return (B_FALSE);
2900 		}
2901 
2902 		addr = (uint64_t)UMC_MASK_EXT_GET_ADDR(val) <<
2903 		    UMC_MASK_EXT_ADDR_SHIFT;
2904 		cs->ucs_sec_mask |= addr;
2905 	}
2906 
2907 	reg = UMC_ADDRCFG_DDR5(id, regno);
2908 	if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
2909 		dev_err(umc->umc_dip, CE_WARN, "failed to read address config "
2910 		    "register %x: %d", SMN_REG_ADDR(reg), ret);
2911 		return (B_FALSE);
2912 	}
2913 	if ((umc->umc_fdata->zufd_flags & ZEN_UMC_FAM_F_CS_XOR) != 0) {
2914 		cs->ucs_cs_xor = UMC_ADDRCFG_DDR5_GET_CSXOR(val);
2915 	} else {
2916 		cs->ucs_cs_xor = 0;
2917 	}
2918 	cs->ucs_nbanks = UMC_ADDRCFG_GET_NBANK_BITS(val) +
2919 	    UMC_ADDRCFG_NBANK_BITS_BASE;
2920 	cs->ucs_ncol = UMC_ADDRCFG_GET_NCOL_BITS(val) +
2921 	    UMC_ADDRCFG_NCOL_BITS_BASE;
2922 	cs->ucs_nrow_lo = UMC_ADDRCFG_GET_NROW_BITS_LO(val) +
2923 	    UMC_ADDRCFG_NROW_BITS_LO_BASE;
2924 	cs->ucs_nrow_hi = 0;
2925 	cs->ucs_nrm = UMC_ADDRCFG_DDR5_GET_NRM_BITS(val);
2926 	cs->ucs_nbank_groups = UMC_ADDRCFG_GET_NBANKGRP_BITS(val);
2927 
2928 	reg = UMC_ADDRSEL_DDR5(id, regno);
2929 	if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
2930 		dev_err(umc->umc_dip, CE_WARN, "failed to read address select "
2931 		    "register %x: %d", SMN_REG_ADDR(reg), ret);
2932 		return (B_FALSE);
2933 	}
2934 	cs->ucs_row_hi_bit = 0;
2935 	cs->ucs_row_low_bit = UMC_ADDRSEL_GET_ROW_LO(val) +
2936 	    UMC_ADDRSEL_ROW_LO_BASE;
2937 	cs->ucs_bank_bits[4] = UMC_ADDRSEL_GET_BANK4(val) +
2938 	    UMC_ADDRSEL_BANK_BASE;
2939 	cs->ucs_bank_bits[3] = UMC_ADDRSEL_GET_BANK3(val) +
2940 	    UMC_ADDRSEL_BANK_BASE;
2941 	cs->ucs_bank_bits[2] = UMC_ADDRSEL_GET_BANK2(val) +
2942 	    UMC_ADDRSEL_BANK_BASE;
2943 	cs->ucs_bank_bits[1] = UMC_ADDRSEL_GET_BANK1(val) +
2944 	    UMC_ADDRSEL_BANK_BASE;
2945 	cs->ucs_bank_bits[0] = UMC_ADDRSEL_GET_BANK0(val) +
2946 	    UMC_ADDRSEL_BANK_BASE;
2947 
2948 	reg = UMC_COLSEL_LO_DDR5(id, regno);
2949 	if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
2950 		dev_err(umc->umc_dip, CE_WARN, "failed to read column address "
2951 		    "select low register %x: %d", SMN_REG_ADDR(reg), ret);
2952 		return (B_FALSE);
2953 	}
2954 	for (uint_t i = 0; i < ZEN_UMC_MAX_COLSEL_PER_REG; i++) {
2955 		cs->ucs_col_bits[i] = UMC_COLSEL_REMAP_GET_COL(val, i) +
2956 		    UMC_COLSEL_LO_BASE;
2957 	}
2958 
2959 	reg = UMC_COLSEL_HI_DDR5(id, regno);
2960 	if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
2961 		dev_err(umc->umc_dip, CE_WARN, "failed to read column address "
2962 		    "select high register %x: %d", SMN_REG_ADDR(reg), ret);
2963 		return (B_FALSE);
2964 	}
2965 	for (uint_t i = 0; i < ZEN_UMC_MAX_COLSEL_PER_REG; i++) {
2966 		cs->ucs_col_bits[i + ZEN_UMC_MAX_COLSEL_PER_REG] =
2967 		    UMC_COLSEL_REMAP_GET_COL(val, i) + UMC_COLSEL_HI_BASE;
2968 	}
2969 
2970 	/*
2971 	 * Time for our friend, the RM Selection register. Like in DDR4 we end
2972 	 * up reading everything here, even though most others have reserved
2973 	 * bits here. The intent is that we won't look at the reserved bits
2974 	 * unless something actually points us there.
2975 	 */
2976 	reg = UMC_RMSEL_DDR5(id, regno);
2977 	if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
2978 		dev_err(umc->umc_dip, CE_WARN, "failed to read rank multiply "
2979 		    "select register %x: %d", SMN_REG_ADDR(reg), ret);
2980 		return (B_FALSE);
2981 	}
2982 
2983 	/*
2984 	 * DDR5 based devices have a primary and secondary msbs; however, they
2985 	 * only have a single set of rm bits. To normalize things with the DDR4
2986 	 * subsystem, we copy the primary bits to the secondary so we can use
2987 	 * these the same way in the decoder/encoder.
2988 	 */
2989 	cs->ucs_inv_msbs = UMC_RMSEL_DDR5_GET_INV_MSBS(val);
2990 	cs->ucs_inv_msbs_sec = UMC_RMSEL_DDR5_GET_INV_MSBS_SEC(val);
2991 	cs->ucs_subchan = UMC_RMSEL_DDR5_GET_SUBCHAN(val) +
2992 	    UMC_RMSEL_DDR5_SUBCHAN_BASE;
2993 	cs->ucs_rm_bits[3] = UMC_RMSEL_DDR5_GET_RM3(val) + UMC_RMSEL_BASE;
2994 	cs->ucs_rm_bits[2] = UMC_RMSEL_DDR5_GET_RM2(val) + UMC_RMSEL_BASE;
2995 	cs->ucs_rm_bits[1] = UMC_RMSEL_DDR5_GET_RM1(val) + UMC_RMSEL_BASE;
2996 	cs->ucs_rm_bits[0] = UMC_RMSEL_DDR5_GET_RM0(val) + UMC_RMSEL_BASE;
2997 	bcopy(cs->ucs_rm_bits, cs->ucs_rm_bits_sec,
2998 	    sizeof (cs->ucs_rm_bits));
2999 
3000 	return (zen_umc_fill_dimm_common(umc, df, chan, dimmno, B_FALSE));
3001 }
3002 
3003 static void
zen_umc_fill_ddr_type(zen_umc_t * umc,zen_umc_chan_t * chan)3004 zen_umc_fill_ddr_type(zen_umc_t *umc, zen_umc_chan_t *chan)
3005 {
3006 	umc_dimm_type_t dimm = UMC_DIMM_T_UNKNOWN;
3007 	uint8_t val;
3008 
3009 	/*
3010 	 * The different UMC styles split into two groups. Those that support
3011 	 * DDR4 and those that support DDR5 (with the hybrid group being in the
3012 	 * DDR5 style camp). While all the values are consistent between
3013 	 * different ones (e.g. reserved values correspond to unsupported
3014 	 * items), we still check types based on the UMC's design type so if we
3015 	 * see something weird, we don't accidentally use an older value.
3016 	 */
3017 	val = UMC_UMCCFG_GET_DDR_TYPE(chan->chan_umccfg_raw);
3018 	switch (umc->umc_fdata->zufd_umc_style) {
3019 	case ZEN_UMC_UMC_S_DDR4:
3020 	case ZEN_UMC_UMC_S_DDR4_APU:
3021 		switch (val) {
3022 		case UMC_UMCCFG_DDR4_T_DDR4:
3023 			dimm = UMC_DIMM_T_DDR4;
3024 			break;
3025 		case UMC_UMCCFG_DDR4_T_LPDDR4:
3026 			dimm = UMC_DIMM_T_LPDDR4;
3027 			break;
3028 		default:
3029 			break;
3030 		}
3031 		break;
3032 	case ZEN_UMC_UMC_S_HYBRID_LPDDR5:
3033 		switch (val) {
3034 		case UMC_UMCCFG_DDR5_T_LPDDR5:
3035 			dimm = UMC_DIMM_T_LPDDR5;
3036 			break;
3037 		case UMC_UMCCFG_DDR5_T_LPDDR4:
3038 			dimm = UMC_DIMM_T_LPDDR4;
3039 			break;
3040 		default:
3041 			break;
3042 		}
3043 		break;
3044 	case ZEN_UMC_UMC_S_DDR5:
3045 	case ZEN_UMC_UMC_S_DDR5_APU:
3046 		switch (val) {
3047 		case UMC_UMCCFG_DDR5_T_DDR5:
3048 			dimm = UMC_DIMM_T_DDR5;
3049 			break;
3050 		case UMC_UMCCFG_DDR5_T_LPDDR5:
3051 			dimm = UMC_DIMM_T_LPDDR5;
3052 			break;
3053 		default:
3054 			break;
3055 		}
3056 		break;
3057 	}
3058 
3059 	chan->chan_type = dimm;
3060 }
3061 
3062 /*
3063  * Use the DDR4 frequency table to determine the speed of this. Note that our
3064  * hybrid based UMCs use 8 bits for the clock, while the traditional DDR4 ones
3065  * only use 7. The caller is responsible for using the right mask for the UMC.
3066  */
3067 static void
zen_umc_fill_chan_ddr4(zen_umc_chan_t * chan,uint_t mstate,const uint32_t clock)3068 zen_umc_fill_chan_ddr4(zen_umc_chan_t *chan, uint_t mstate,
3069     const uint32_t clock)
3070 {
3071 	for (size_t i = 0; i < ARRAY_SIZE(zen_umc_ddr4_map); i++) {
3072 		if (clock == zen_umc_ddr4_map[i].zufm_reg) {
3073 			chan->chan_clock[mstate] = zen_umc_ddr4_map[i].zufm_mhz;
3074 			chan->chan_speed[mstate] =
3075 			    zen_umc_ddr4_map[i].zufm_mts2;
3076 			break;
3077 		}
3078 	}
3079 }
3080 
3081 static void
zen_umc_fill_chan_hyb_lpddr5(zen_umc_chan_t * chan,uint_t mstate)3082 zen_umc_fill_chan_hyb_lpddr5(zen_umc_chan_t *chan, uint_t mstate)
3083 {
3084 	const uint32_t reg = chan->chan_dramcfg_raw[mstate];
3085 	const uint32_t wck = UMC_DRAMCFG_HYB_GET_WCLKRATIO(reg);
3086 	const uint32_t clock = UMC_DRAMCFG_HYB_GET_MEMCLK(reg);
3087 	boolean_t twox;
3088 
3089 	switch (wck) {
3090 	case UMC_DRAMCFG_WCLKRATIO_1TO2:
3091 		twox = B_TRUE;
3092 		break;
3093 	case UMC_DRAMCFG_WCLKRATIO_1TO4:
3094 		twox = B_FALSE;
3095 		break;
3096 	default:
3097 		return;
3098 	}
3099 
3100 	for (size_t i = 0; i < ARRAY_SIZE(zen_umc_lpddr5_map); i++) {
3101 		if (clock == zen_umc_lpddr5_map[i].zufm_reg) {
3102 			chan->chan_clock[mstate] =
3103 			    zen_umc_lpddr5_map[i].zufm_mhz;
3104 
3105 			if (twox) {
3106 				chan->chan_speed[mstate] =
3107 				    zen_umc_lpddr5_map[i].zufm_mts2;
3108 			} else {
3109 				chan->chan_speed[mstate] =
3110 				    zen_umc_lpddr5_map[i].zufm_mts4;
3111 			}
3112 			break;
3113 		}
3114 	}
3115 }
3116 
3117 /*
3118  * Determine the current operating frequency of the channel. This varies based
3119  * upon the type of UMC that we're operating on as there are multiple ways to
3120  * determine this. There are up to four memory P-states that exist in the UMC.
3121  * This grabs it for a single P-state at a time.
3122  *
3123  * Unlike other things, if we cannot determine the frequency of the clock or
3124  * transfer speed, we do not consider this fatal because that does not stop
3125  * decoding. It only means that we cannot give a bit of useful information to
3126  * topo.
3127  */
3128 static void
zen_umc_fill_chan_freq(zen_umc_t * umc,zen_umc_chan_t * chan,uint_t mstate)3129 zen_umc_fill_chan_freq(zen_umc_t *umc, zen_umc_chan_t *chan, uint_t mstate)
3130 {
3131 	const uint32_t cfg = chan->chan_dramcfg_raw[mstate];
3132 	const umc_dimm_type_t dimm_type = chan->chan_type;
3133 
3134 	switch (umc->umc_fdata->zufd_umc_style) {
3135 	case ZEN_UMC_UMC_S_HYBRID_LPDDR5:
3136 		if (dimm_type == UMC_DIMM_T_LPDDR5) {
3137 			zen_umc_fill_chan_hyb_lpddr5(chan, mstate);
3138 		} else if (dimm_type != UMC_DIMM_T_LPDDR4) {
3139 			zen_umc_fill_chan_ddr4(chan, mstate,
3140 			    UMC_DRAMCFG_HYB_GET_MEMCLK(cfg));
3141 		}
3142 		break;
3143 	case ZEN_UMC_UMC_S_DDR4:
3144 	case ZEN_UMC_UMC_S_DDR4_APU:
3145 		zen_umc_fill_chan_ddr4(chan, mstate,
3146 		    UMC_DRAMCFG_DDR4_GET_MEMCLK(cfg));
3147 		break;
3148 	case ZEN_UMC_UMC_S_DDR5:
3149 	case ZEN_UMC_UMC_S_DDR5_APU:
3150 		chan->chan_clock[mstate] = UMC_DRAMCFG_DDR5_GET_MEMCLK(cfg);
3151 		if (dimm_type == UMC_DIMM_T_DDR5) {
3152 			chan->chan_speed[mstate] = 2 * chan->chan_clock[mstate];
3153 		} else if (dimm_type == UMC_DIMM_T_LPDDR5) {
3154 			switch (UMC_DRAMCFG_LPDDR5_GET_WCKRATIO(cfg)) {
3155 			case UMC_DRAMCFG_WCLKRATIO_1TO2:
3156 				chan->chan_speed[mstate] = 2 *
3157 				    chan->chan_clock[mstate];
3158 				break;
3159 			case UMC_DRAMCFG_WCLKRATIO_1TO4:
3160 				chan->chan_speed[mstate] = 4 *
3161 				    chan->chan_clock[mstate];
3162 				break;
3163 			default:
3164 				break;
3165 			}
3166 		}
3167 		break;
3168 	}
3169 }
3170 
3171 /*
3172  * Fill common channel information. While the locations of many of the registers
3173  * changed between the DDR4-capable and DDR5-capable devices, the actual
3174  * contents are the same so we process them together.
3175  */
3176 static boolean_t
zen_umc_fill_chan_hash(zen_umc_t * umc,zen_umc_df_t * df,zen_umc_chan_t * chan,boolean_t ddr4)3177 zen_umc_fill_chan_hash(zen_umc_t *umc, zen_umc_df_t *df, zen_umc_chan_t *chan,
3178     boolean_t ddr4)
3179 {
3180 	int ret;
3181 	smn_reg_t reg;
3182 	uint32_t val;
3183 
3184 	const umc_chan_hash_flags_t flags = umc->umc_fdata->zufd_chan_hash;
3185 	const uint32_t id = chan->chan_logid;
3186 	umc_chan_hash_t *chash = &chan->chan_hash;
3187 	chash->uch_flags = flags;
3188 
3189 	if ((flags & UMC_CHAN_HASH_F_BANK) != 0) {
3190 		for (uint_t i = 0; i < ZEN_UMC_MAX_CHAN_BANK_HASH; i++) {
3191 			umc_bank_hash_t *bank = &chash->uch_bank_hashes[i];
3192 
3193 			if (ddr4) {
3194 				reg = UMC_BANK_HASH_DDR4(id, i);
3195 			} else {
3196 				reg = UMC_BANK_HASH_DDR5(id, i);
3197 			}
3198 
3199 			if ((ret = amdzen_c_smn_read(df->zud_dfno, reg,
3200 			    &val)) != 0) {
3201 				dev_err(umc->umc_dip, CE_WARN, "failed to read "
3202 				    "bank hash register %x: %d",
3203 				    SMN_REG_ADDR(reg), ret);
3204 				return (B_FALSE);
3205 			}
3206 
3207 			bank->ubh_row_xor = UMC_BANK_HASH_GET_ROW(val);
3208 			bank->ubh_col_xor = UMC_BANK_HASH_GET_COL(val);
3209 			bank->ubh_en = UMC_BANK_HASH_GET_EN(val);
3210 		}
3211 	}
3212 
3213 	if ((flags & UMC_CHAN_HASH_F_RM) != 0) {
3214 		for (uint_t i = 0; i < ZEN_UMC_MAX_CHAN_RM_HASH; i++) {
3215 			uint64_t addr;
3216 			umc_addr_hash_t *rm = &chash->uch_rm_hashes[i];
3217 
3218 			if (ddr4) {
3219 				reg = UMC_RANK_HASH_DDR4(id, i);
3220 			} else {
3221 				reg = UMC_RANK_HASH_DDR5(id, i);
3222 			}
3223 
3224 			if ((ret = amdzen_c_smn_read(df->zud_dfno, reg,
3225 			    &val)) != 0) {
3226 				dev_err(umc->umc_dip, CE_WARN, "failed to read "
3227 				    "rm hash register %x: %d",
3228 				    SMN_REG_ADDR(reg), ret);
3229 				return (B_FALSE);
3230 			}
3231 
3232 			addr = UMC_RANK_HASH_GET_ADDR(val);
3233 			rm->uah_addr_xor = addr << UMC_RANK_HASH_SHIFT;
3234 			rm->uah_en = UMC_RANK_HASH_GET_EN(val);
3235 
3236 			if (ddr4 || (umc->umc_fdata->zufd_flags &
3237 			    ZEN_UMC_FAM_F_UMC_EADDR) == 0) {
3238 				continue;
3239 			}
3240 
3241 			reg = UMC_RANK_HASH_EXT_DDR5(id, i);
3242 			if ((ret = amdzen_c_smn_read(df->zud_dfno, reg,
3243 			    &val)) != 0) {
3244 				dev_err(umc->umc_dip, CE_WARN, "failed to read "
3245 				    "rm hash ext register %x: %d",
3246 				    SMN_REG_ADDR(reg), ret);
3247 				return (B_FALSE);
3248 			}
3249 
3250 			addr = UMC_RANK_HASH_EXT_GET_ADDR(val);
3251 			rm->uah_addr_xor |= addr <<
3252 			    UMC_RANK_HASH_EXT_ADDR_SHIFT;
3253 		}
3254 	}
3255 
3256 	if ((flags & UMC_CHAN_HASH_F_PC) != 0) {
3257 		umc_pc_hash_t *pc = &chash->uch_pc_hash;
3258 
3259 		if (ddr4) {
3260 			reg = UMC_PC_HASH_DDR4(id);
3261 		} else {
3262 			reg = UMC_PC_HASH_DDR5(id);
3263 		}
3264 
3265 		if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
3266 			dev_err(umc->umc_dip, CE_WARN, "failed to read pc hash "
3267 			    "register %x: %d", SMN_REG_ADDR(reg), ret);
3268 			return (B_FALSE);
3269 		}
3270 
3271 		pc->uph_row_xor = UMC_PC_HASH_GET_ROW(val);
3272 		pc->uph_col_xor = UMC_PC_HASH_GET_COL(val);
3273 		pc->uph_en = UMC_PC_HASH_GET_EN(val);
3274 
3275 		if (ddr4) {
3276 			reg = UMC_PC_HASH2_DDR4(id);
3277 		} else {
3278 			reg = UMC_PC_HASH2_DDR5(id);
3279 		}
3280 
3281 		if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
3282 			dev_err(umc->umc_dip, CE_WARN, "failed to read pc hash "
3283 			    "2 register %x: %d", SMN_REG_ADDR(reg), ret);
3284 			return (B_FALSE);
3285 		}
3286 
3287 		pc->uph_bank_xor = UMC_PC_HASH2_GET_BANK(val);
3288 	}
3289 
3290 	if ((flags & UMC_CHAN_HASH_F_CS) != 0) {
3291 		for (uint_t i = 0; i < ZEN_UMC_MAX_CHAN_CS_HASH; i++) {
3292 			uint64_t addr;
3293 			umc_addr_hash_t *rm = &chash->uch_cs_hashes[i];
3294 
3295 			if (ddr4) {
3296 				reg = UMC_CS_HASH_DDR4(id, i);
3297 			} else {
3298 				reg = UMC_CS_HASH_DDR5(id, i);
3299 			}
3300 
3301 			if ((ret = amdzen_c_smn_read(df->zud_dfno, reg,
3302 			    &val)) != 0) {
3303 				dev_err(umc->umc_dip, CE_WARN, "failed to read "
3304 				    "cs hash register %x", SMN_REG_ADDR(reg));
3305 				return (B_FALSE);
3306 			}
3307 
3308 			addr = UMC_CS_HASH_GET_ADDR(val);
3309 			rm->uah_addr_xor = addr << UMC_CS_HASH_SHIFT;
3310 			rm->uah_en = UMC_CS_HASH_GET_EN(val);
3311 
3312 			if (ddr4 || (umc->umc_fdata->zufd_flags &
3313 			    ZEN_UMC_FAM_F_UMC_EADDR) == 0) {
3314 				continue;
3315 			}
3316 
3317 			reg = UMC_CS_HASH_EXT_DDR5(id, i);
3318 			if ((ret = amdzen_c_smn_read(df->zud_dfno, reg,
3319 			    &val)) != 0) {
3320 				dev_err(umc->umc_dip, CE_WARN, "failed to read "
3321 				    "cs hash ext register %x",
3322 				    SMN_REG_ADDR(reg));
3323 				return (B_FALSE);
3324 			}
3325 
3326 			addr = UMC_CS_HASH_EXT_GET_ADDR(val);
3327 			rm->uah_addr_xor |= addr << UMC_CS_HASH_EXT_ADDR_SHIFT;
3328 		}
3329 	}
3330 
3331 	return (B_TRUE);
3332 }
3333 
3334 /*
3335  * This fills in settings that we care about which are valid for the entire
3336  * channel and are the same between DDR4/5 capable devices.
3337  */
3338 static boolean_t
zen_umc_fill_chan(zen_umc_t * umc,zen_umc_df_t * df,zen_umc_chan_t * chan)3339 zen_umc_fill_chan(zen_umc_t *umc, zen_umc_df_t *df, zen_umc_chan_t *chan)
3340 {
3341 	uint32_t val;
3342 	smn_reg_t reg;
3343 	const uint32_t id = chan->chan_logid;
3344 	int ret;
3345 	boolean_t ddr4;
3346 
3347 	if (umc->umc_fdata->zufd_umc_style == ZEN_UMC_UMC_S_DDR4 ||
3348 	    umc->umc_fdata->zufd_umc_style == ZEN_UMC_UMC_S_DDR4_APU) {
3349 		ddr4 = B_TRUE;
3350 	} else {
3351 		ddr4 = B_FALSE;
3352 	}
3353 
3354 	/*
3355 	 * Begin by gathering all of the information related to hashing. What is
3356 	 * valid here varies based on the actual chip family and then the
3357 	 * registers vary based on DDR4 and DDR5.
3358 	 */
3359 	if (!zen_umc_fill_chan_hash(umc, df, chan, ddr4)) {
3360 		return (B_FALSE);
3361 	}
3362 
3363 	reg = UMC_UMCCFG(id);
3364 	if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
3365 		dev_err(umc->umc_dip, CE_WARN, "failed to read UMC "
3366 		    "configuration register %x: %d", SMN_REG_ADDR(reg), ret);
3367 		return (B_FALSE);
3368 	}
3369 
3370 	chan->chan_umccfg_raw = val;
3371 	if (UMC_UMCCFG_GET_ECC_EN(val)) {
3372 		chan->chan_flags |= UMC_CHAN_F_ECC_EN;
3373 	}
3374 
3375 	/*
3376 	 * Grab the DRAM configuration register. This can be used to determine
3377 	 * the frequency and speed of the memory channel. At this time we only
3378 	 * capture Memory P-state 0.
3379 	 */
3380 	reg = UMC_DRAMCFG(id, 0);
3381 
3382 	/*
3383 	 * This register contains information to determine the type of DIMM.
3384 	 * All DIMMs in the channel must be the same type so we leave this
3385 	 * setting on the channel. Once we have that, we proceed to obtain the
3386 	 * currently configuration information for the DRAM in each memory
3387 	 * P-state.
3388 	 */
3389 	zen_umc_fill_ddr_type(umc, chan);
3390 	for (uint_t i = 0; i < ZEN_UMC_NMEM_PSTATES; i++) {
3391 		chan->chan_clock[i] = ZEN_UMC_UNKNOWN_FREQ;
3392 		chan->chan_speed[i] = ZEN_UMC_UNKNOWN_FREQ;
3393 
3394 		reg = UMC_DRAMCFG(id, i);
3395 		if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
3396 			dev_err(umc->umc_dip, CE_WARN, "failed to read DRAM "
3397 			    "Configuration register P-state %u %x: %d", i,
3398 			    SMN_REG_ADDR(reg), ret);
3399 			return (B_FALSE);
3400 		}
3401 		chan->chan_dramcfg_raw[i] = val;
3402 
3403 		zen_umc_fill_chan_freq(umc, chan, i);
3404 	}
3405 
3406 	/*
3407 	 * Grab data that we can use to determine if we're scrambling or
3408 	 * encrypting regions of memory.
3409 	 */
3410 	reg = UMC_DATACTL(id);
3411 	if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
3412 		dev_err(umc->umc_dip, CE_WARN, "failed to read data control "
3413 		    "register %x: %d", SMN_REG_ADDR(reg), ret);
3414 		return (B_FALSE);
3415 	}
3416 	chan->chan_datactl_raw = val;
3417 	if (UMC_DATACTL_GET_SCRAM_EN(val)) {
3418 		chan->chan_flags |= UMC_CHAN_F_SCRAMBLE_EN;
3419 	}
3420 
3421 	if (UMC_DATACTL_GET_ENCR_EN(val)) {
3422 		chan->chan_flags |= UMC_CHAN_F_ENCR_EN;
3423 	}
3424 
3425 	/*
3426 	 * At the moment we snapshot the raw ECC control information. When we do
3427 	 * further work of making this a part of the MCA/X decoding, we'll want
3428 	 * to further take this apart for syndrome decoding. Until then, simply
3429 	 * cache it for future us and observability.
3430 	 */
3431 	reg = UMC_ECCCTL(id);
3432 	if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
3433 		dev_err(umc->umc_dip, CE_WARN, "failed to read ECC control "
3434 		    "register %x: %d", SMN_REG_ADDR(reg), ret);
3435 		return (B_FALSE);
3436 	}
3437 	chan->chan_eccctl_raw = val;
3438 
3439 	/*
3440 	 * Read and snapshot the UMC capability registers for debugging in the
3441 	 * future.
3442 	 */
3443 	reg = UMC_UMCCAP(id);
3444 	if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
3445 		dev_err(umc->umc_dip, CE_WARN, "failed to read UMC cap"
3446 		    "register %x: %d", SMN_REG_ADDR(reg), ret);
3447 		return (B_FALSE);
3448 	}
3449 	chan->chan_umccap_raw = val;
3450 
3451 	reg = UMC_UMCCAP_HI(id);
3452 	if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) {
3453 		dev_err(umc->umc_dip, CE_WARN, "failed to read UMC cap high "
3454 		    "register %x: %d", SMN_REG_ADDR(reg), ret);
3455 		return (B_FALSE);
3456 	}
3457 	chan->chan_umccap_hi_raw = val;
3458 
3459 	return (B_TRUE);
3460 }
3461 
3462 static int
zen_umc_fill_umc_cb(const uint_t dfno,const uint32_t fabid,const uint32_t instid,void * arg)3463 zen_umc_fill_umc_cb(const uint_t dfno, const uint32_t fabid,
3464     const uint32_t instid, void *arg)
3465 {
3466 	zen_umc_t *umc = arg;
3467 	zen_umc_df_t *df = &umc->umc_dfs[dfno];
3468 	zen_umc_chan_t *chan = &df->zud_chan[df->zud_nchan];
3469 
3470 	df->zud_nchan++;
3471 	VERIFY3U(df->zud_nchan, <=, ZEN_UMC_MAX_UMCS);
3472 
3473 	/*
3474 	 * The data fabric is generally organized such that all UMC entries
3475 	 * should be continuous in their fabric ID space; however, we don't
3476 	 * want to rely on specific ID locations. The UMC SMN addresses are
3477 	 * organized in a relative order. To determine the SMN ID to use (the
3478 	 * chan_logid) we end up making the following assumptions:
3479 	 *
3480 	 *  o The iteration order will always be from the lowest component ID
3481 	 *    to the highest component ID.
3482 	 *  o The relative order that we encounter will be the same as the SMN
3483 	 *    order. That is, the first thing we find (regardless of component
3484 	 *    ID) will be SMN UMC entry 0, the next 1, etc.
3485 	 */
3486 	chan->chan_logid = df->zud_nchan - 1;
3487 	chan->chan_fabid = fabid;
3488 	chan->chan_instid = instid;
3489 	chan->chan_nrules = umc->umc_fdata->zufd_cs_nrules;
3490 	for (uint_t i = 0; i < umc->umc_fdata->zufd_cs_nrules; i++) {
3491 		if (zen_umc_read_dram_rule(umc, dfno, instid, i,
3492 		    &chan->chan_rules[i]) != 0) {
3493 			return (-1);
3494 		}
3495 	}
3496 
3497 	for (uint_t i = 0; i < umc->umc_fdata->zufd_cs_nrules - 1; i++) {
3498 		int ret;
3499 		uint32_t offset;
3500 		uint64_t t;
3501 		df_reg_def_t off_reg;
3502 		chan_offset_t *offp = &chan->chan_offsets[i];
3503 
3504 		switch (umc->umc_df_rev) {
3505 		case DF_REV_2:
3506 		case DF_REV_3:
3507 		case DF_REV_3P5:
3508 			ASSERT3U(i, ==, 0);
3509 			off_reg = DF_DRAM_OFFSET_V2;
3510 			break;
3511 		case DF_REV_4:
3512 		case DF_REV_4D2:
3513 			off_reg = DF_DRAM_OFFSET_V4(i);
3514 			break;
3515 		default:
3516 			dev_err(umc->umc_dip, CE_WARN, "!encountered "
3517 			    "unsupported DF revision processing DRAM Offsets: "
3518 			    "0x%x", umc->umc_df_rev);
3519 			return (-1);
3520 		}
3521 
3522 		if ((ret = amdzen_c_df_read32(dfno, instid, off_reg,
3523 		    &offset)) != 0) {
3524 			dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM "
3525 			    "offset %u on 0x%x/0x%x: %d", i, dfno, instid, ret);
3526 			return (-1);
3527 		}
3528 
3529 		offp->cho_raw = offset;
3530 		offp->cho_valid = DF_DRAM_OFFSET_GET_EN(offset);
3531 
3532 		switch (umc->umc_df_rev) {
3533 		case DF_REV_2:
3534 			t = DF_DRAM_OFFSET_V2_GET_OFFSET(offset);
3535 			break;
3536 		case DF_REV_3:
3537 		case DF_REV_3P5:
3538 			t = DF_DRAM_OFFSET_V3_GET_OFFSET(offset);
3539 			break;
3540 		case DF_REV_4:
3541 		case DF_REV_4D2:
3542 			t = DF_DRAM_OFFSET_V4_GET_OFFSET(offset);
3543 			break;
3544 		default:
3545 			dev_err(umc->umc_dip, CE_WARN, "!encountered "
3546 			    "unsupported DF revision processing DRAM Offsets: "
3547 			    "0x%x", umc->umc_df_rev);
3548 			return (-1);
3549 		}
3550 		offp->cho_offset = t << DF_DRAM_OFFSET_SHIFT;
3551 	}
3552 
3553 	/*
3554 	 * If this platform supports our favorete Zen 3 6-channel hash special
3555 	 * then we need to grab the NP2 configuration registers. This will only
3556 	 * be referenced if this channel is actually being used for a 6-channel
3557 	 * hash, so even if the contents are weird that should still be ok.
3558 	 */
3559 	if ((umc->umc_fdata->zufd_flags & ZEN_UMC_FAM_F_NP2) != 0) {
3560 		uint32_t np2;
3561 		int ret;
3562 
3563 		if ((ret = amdzen_c_df_read32(dfno, instid, DF_NP2_CONFIG_V3,
3564 		    &np2)) != 0) {
3565 			dev_err(umc->umc_dip, CE_WARN, "!failed to read NP2 "
3566 			    "config: %d", ret);
3567 			return (-1);
3568 		}
3569 
3570 		chan->chan_np2_raw = np2;
3571 		chan->chan_np2_space0 = DF_NP2_CONFIG_V3_GET_SPACE0(np2);
3572 	}
3573 
3574 	/*
3575 	 * Now that we have everything we need from the data fabric, read out
3576 	 * the rest of what we need from the UMC channel data in SMN register
3577 	 * space.
3578 	 */
3579 	switch (umc->umc_fdata->zufd_umc_style) {
3580 	case ZEN_UMC_UMC_S_DDR4:
3581 	case ZEN_UMC_UMC_S_DDR4_APU:
3582 		for (uint_t i = 0; i < ZEN_UMC_MAX_DIMMS; i++) {
3583 			if (!zen_umc_fill_chan_dimm_ddr4(umc, df, chan, i)) {
3584 				return (-1);
3585 			}
3586 		}
3587 		break;
3588 	case ZEN_UMC_UMC_S_HYBRID_LPDDR5:
3589 	case ZEN_UMC_UMC_S_DDR5:
3590 	case ZEN_UMC_UMC_S_DDR5_APU:
3591 		for (uint_t i = 0; i < ZEN_UMC_MAX_DIMMS; i++) {
3592 			for (uint_t r = 0; r < ZEN_UMC_MAX_CS_PER_DIMM; r++) {
3593 				if (!zen_umc_fill_chan_rank_ddr5(umc, df, chan,
3594 				    i, r)) {
3595 					return (-1);
3596 				}
3597 			}
3598 		}
3599 		break;
3600 	default:
3601 		dev_err(umc->umc_dip, CE_WARN, "!encountered unsupported "
3602 		    "Zen family: 0x%x", umc->umc_fdata->zufd_umc_style);
3603 		return (-1);
3604 	}
3605 
3606 	if (!zen_umc_fill_chan(umc, df, chan)) {
3607 		return (-1);
3608 	}
3609 
3610 	return (0);
3611 }
3612 
3613 /*
3614  * Today there are no privileges for the memory controller information, it is
3615  * restricted based on file system permissions.
3616  */
3617 static int
zen_umc_open(dev_t * devp,int flag,int otyp,cred_t * credp)3618 zen_umc_open(dev_t *devp, int flag, int otyp, cred_t *credp)
3619 {
3620 	zen_umc_t *umc = zen_umc;
3621 
3622 	if ((flag & (FEXCL | FNDELAY | FNONBLOCK | FWRITE)) != 0) {
3623 		return (EINVAL);
3624 	}
3625 
3626 	if (otyp != OTYP_CHR) {
3627 		return (EINVAL);
3628 	}
3629 
3630 	if (getminor(*devp) >= umc->umc_ndfs) {
3631 		return (ENXIO);
3632 	}
3633 
3634 	return (0);
3635 }
3636 
3637 static void
zen_umc_ioctl_decode(zen_umc_t * umc,mc_encode_ioc_t * encode)3638 zen_umc_ioctl_decode(zen_umc_t *umc, mc_encode_ioc_t *encode)
3639 {
3640 	zen_umc_decoder_t dec;
3641 	uint32_t sock, die, comp;
3642 
3643 	bzero(&dec, sizeof (dec));
3644 	if (!zen_umc_decode_pa(umc, encode->mcei_pa, &dec)) {
3645 		encode->mcei_err = (uint32_t)dec.dec_fail;
3646 		encode->mcei_errdata = dec.dec_fail_data;
3647 		return;
3648 	}
3649 
3650 	encode->mcei_errdata = 0;
3651 	encode->mcei_err = 0;
3652 	encode->mcei_chan_addr = dec.dec_norm_addr;
3653 	encode->mcei_rank_addr = UINT64_MAX;
3654 	encode->mcei_board = 0;
3655 	zen_fabric_id_decompose(&umc->umc_decomp, dec.dec_targ_fabid, &sock,
3656 	    &die, &comp);
3657 	encode->mcei_chip = sock;
3658 	encode->mcei_die = die;
3659 	encode->mcei_mc = dec.dec_umc_chan->chan_logid;
3660 	encode->mcei_chan = 0;
3661 	encode->mcei_dimm = dec.dec_dimm_no;
3662 	encode->mcei_row = dec.dec_dimm_row;
3663 	encode->mcei_column = dec.dec_dimm_col;
3664 	/*
3665 	 * We don't have a logical rank that something matches to, we have the
3666 	 * actual chip-select and rank multiplication. If we could figure out
3667 	 * how to transform that into an actual rank, that'd be grand.
3668 	 */
3669 	encode->mcei_rank = UINT8_MAX;
3670 	encode->mcei_cs = dec.dec_dimm_csno;
3671 	encode->mcei_rm = dec.dec_dimm_rm;
3672 	encode->mcei_bank = dec.dec_dimm_bank;
3673 	encode->mcei_bank_group = dec.dec_dimm_bank_group;
3674 	encode->mcei_subchan = dec.dec_dimm_subchan;
3675 }
3676 
3677 static void
umc_decoder_pack(zen_umc_t * umc)3678 umc_decoder_pack(zen_umc_t *umc)
3679 {
3680 	char *buf = NULL;
3681 	size_t len = 0;
3682 
3683 	ASSERT(MUTEX_HELD(&umc->umc_nvl_lock));
3684 	if (umc->umc_decoder_buf != NULL) {
3685 		return;
3686 	}
3687 
3688 	if (umc->umc_decoder_nvl == NULL) {
3689 		umc->umc_decoder_nvl = zen_umc_dump_decoder(umc);
3690 		if (umc->umc_decoder_nvl == NULL) {
3691 			return;
3692 		}
3693 	}
3694 
3695 	if (nvlist_pack(umc->umc_decoder_nvl, &buf, &len, NV_ENCODE_XDR,
3696 	    KM_NOSLEEP_LAZY) != 0) {
3697 		return;
3698 	}
3699 
3700 	umc->umc_decoder_buf = buf;
3701 	umc->umc_decoder_len = len;
3702 }
3703 
3704 static int
zen_umc_ioctl(dev_t dev,int cmd,intptr_t arg,int mode,cred_t * credp,int * rvalp)3705 zen_umc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
3706     int *rvalp)
3707 {
3708 	int ret;
3709 	zen_umc_t *umc = zen_umc;
3710 	mc_encode_ioc_t encode;
3711 	mc_snapshot_info_t info;
3712 
3713 	if (getminor(dev) >= umc->umc_ndfs) {
3714 		return (ENXIO);
3715 	}
3716 
3717 	switch (cmd) {
3718 	case MC_IOC_DECODE_PA:
3719 		if (crgetzoneid(credp) != GLOBAL_ZONEID ||
3720 		    drv_priv(credp) != 0) {
3721 			ret = EPERM;
3722 			break;
3723 		}
3724 
3725 		if (ddi_copyin((void *)arg, &encode, sizeof (encode),
3726 		    mode & FKIOCTL) != 0) {
3727 			ret = EFAULT;
3728 			break;
3729 		}
3730 
3731 		zen_umc_ioctl_decode(umc, &encode);
3732 		ret = 0;
3733 
3734 		if (ddi_copyout(&encode, (void *)arg, sizeof (encode),
3735 		    mode & FKIOCTL) != 0) {
3736 			ret = EFAULT;
3737 			break;
3738 		}
3739 		break;
3740 	case MC_IOC_DECODE_SNAPSHOT_INFO:
3741 		mutex_enter(&umc->umc_nvl_lock);
3742 		umc_decoder_pack(umc);
3743 
3744 		if (umc->umc_decoder_buf == NULL) {
3745 			mutex_exit(&umc->umc_nvl_lock);
3746 			ret = EIO;
3747 			break;
3748 		}
3749 
3750 		if (umc->umc_decoder_len > UINT32_MAX) {
3751 			mutex_exit(&umc->umc_nvl_lock);
3752 			ret = EOVERFLOW;
3753 			break;
3754 		}
3755 
3756 		info.mcs_size = umc->umc_decoder_len;
3757 		info.mcs_gen = 0;
3758 		if (ddi_copyout(&info, (void *)arg, sizeof (info),
3759 		    mode & FKIOCTL) != 0) {
3760 			mutex_exit(&umc->umc_nvl_lock);
3761 			ret = EFAULT;
3762 			break;
3763 		}
3764 
3765 		mutex_exit(&umc->umc_nvl_lock);
3766 		ret = 0;
3767 		break;
3768 	case MC_IOC_DECODE_SNAPSHOT:
3769 		mutex_enter(&umc->umc_nvl_lock);
3770 		umc_decoder_pack(umc);
3771 
3772 		if (umc->umc_decoder_buf == NULL) {
3773 			mutex_exit(&umc->umc_nvl_lock);
3774 			ret = EIO;
3775 			break;
3776 		}
3777 
3778 		if (ddi_copyout(umc->umc_decoder_buf, (void *)arg,
3779 		    umc->umc_decoder_len, mode & FKIOCTL) != 0) {
3780 			mutex_exit(&umc->umc_nvl_lock);
3781 			ret = EFAULT;
3782 			break;
3783 		}
3784 
3785 		mutex_exit(&umc->umc_nvl_lock);
3786 		ret = 0;
3787 		break;
3788 	default:
3789 		ret = ENOTTY;
3790 		break;
3791 	}
3792 
3793 	return (ret);
3794 }
3795 
3796 static int
zen_umc_close(dev_t dev,int flag,int otyp,cred_t * credp)3797 zen_umc_close(dev_t dev, int flag, int otyp, cred_t *credp)
3798 {
3799 	return (0);
3800 }
3801 
3802 static void
zen_umc_cleanup(zen_umc_t * umc)3803 zen_umc_cleanup(zen_umc_t *umc)
3804 {
3805 	nvlist_free(umc->umc_decoder_nvl);
3806 	umc->umc_decoder_nvl = NULL;
3807 	if (umc->umc_decoder_buf != NULL) {
3808 		kmem_free(umc->umc_decoder_buf, umc->umc_decoder_len);
3809 		umc->umc_decoder_buf = NULL;
3810 		umc->umc_decoder_len = 0;
3811 	}
3812 
3813 	if (umc->umc_dip != NULL) {
3814 		ddi_remove_minor_node(umc->umc_dip, NULL);
3815 	}
3816 	mutex_destroy(&umc->umc_nvl_lock);
3817 	kmem_free(umc, sizeof (zen_umc_t));
3818 }
3819 
3820 static int
zen_umc_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)3821 zen_umc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
3822 {
3823 	int ret;
3824 	zen_umc_t *umc;
3825 
3826 	if (cmd == DDI_RESUME) {
3827 		return (DDI_SUCCESS);
3828 	} else if (cmd != DDI_ATTACH) {
3829 		return (DDI_FAILURE);
3830 	}
3831 	if (zen_umc != NULL) {
3832 		dev_err(dip, CE_WARN, "!zen_umc is already attached to a "
3833 		    "dev_info_t: %p", zen_umc->umc_dip);
3834 		return (DDI_FAILURE);
3835 	}
3836 
3837 	/*
3838 	 * To get us going, we need to do several bits of set up. First, we need
3839 	 * to use the knowledge about the actual hardware that we're using to
3840 	 * encode a bunch of different data:
3841 	 *
3842 	 *  o The set of register styles and extra hardware features that exist
3843 	 *    on the hardware platform.
3844 	 *  o The number of actual rules there are for the CCMs and UMCs.
3845 	 *  o How many actual things exist (DFs, etc.)
3846 	 *  o Useful fabric and instance IDs for all of the different UMC
3847 	 *    entries so we can actually talk to them.
3848 	 *
3849 	 * Only once we have all the above will we go dig into the actual data.
3850 	 */
3851 	umc = kmem_zalloc(sizeof (zen_umc_t), KM_SLEEP);
3852 	mutex_init(&umc->umc_nvl_lock, NULL, MUTEX_DRIVER, NULL);
3853 	umc->umc_family = chiprev_family(cpuid_getchiprev(CPU));
3854 	umc->umc_ndfs = amdzen_c_df_count();
3855 	umc->umc_dip = dip;
3856 
3857 	if (!zen_umc_identify(umc)) {
3858 		dev_err(dip, CE_WARN, "!encountered unsupported CPU");
3859 		goto err;
3860 	}
3861 
3862 	umc->umc_df_rev = amdzen_c_df_rev();
3863 	switch (umc->umc_df_rev) {
3864 	case DF_REV_2:
3865 	case DF_REV_3:
3866 	case DF_REV_3P5:
3867 	case DF_REV_4:
3868 	case DF_REV_4D2:
3869 		break;
3870 	default:
3871 		dev_err(dip, CE_WARN, "!encountered unknown DF revision: %x",
3872 		    umc->umc_df_rev);
3873 		goto err;
3874 	}
3875 
3876 	if ((ret = amdzen_c_df_fabric_decomp(&umc->umc_decomp)) != 0) {
3877 		dev_err(dip, CE_WARN, "!failed to get fabric decomposition: %d",
3878 		    ret);
3879 	}
3880 
3881 	umc->umc_tom = rdmsr(MSR_AMD_TOM);
3882 	umc->umc_tom2 = rdmsr(MSR_AMD_TOM2);
3883 
3884 	/*
3885 	 * For each DF, start by reading all of the data that we need from it.
3886 	 * This involves finding a target CCM, reading all of the rules,
3887 	 * ancillary settings, and related. Then we'll do a pass over all of the
3888 	 * actual UMC targets there.
3889 	 */
3890 	for (uint_t i = 0; i < umc->umc_ndfs; i++) {
3891 		if (amdzen_c_df_iter(i, ZEN_DF_TYPE_CCM_CPU,
3892 		    zen_umc_fill_ccm_cb, umc) < 0 ||
3893 		    amdzen_c_df_iter(i, ZEN_DF_TYPE_CS_UMC, zen_umc_fill_umc_cb,
3894 		    umc) != 0) {
3895 			goto err;
3896 		}
3897 	}
3898 
3899 	/*
3900 	 * Create a minor node for each df that we encounter.
3901 	 */
3902 	for (uint_t i = 0; i < umc->umc_ndfs; i++) {
3903 		int ret;
3904 		char minor[64];
3905 
3906 		(void) snprintf(minor, sizeof (minor), "mc-umc-%u", i);
3907 		if ((ret = ddi_create_minor_node(umc->umc_dip, minor, S_IFCHR,
3908 		    i, "ddi_mem_ctrl", 0)) != 0) {
3909 			dev_err(dip, CE_WARN, "!failed to create minor %s: %d",
3910 			    minor, ret);
3911 			goto err;
3912 		}
3913 	}
3914 
3915 	zen_umc = umc;
3916 	return (DDI_SUCCESS);
3917 
3918 err:
3919 	zen_umc_cleanup(umc);
3920 	return (DDI_FAILURE);
3921 }
3922 
3923 static int
zen_umc_getinfo(dev_info_t * dip,ddi_info_cmd_t cmd,void * arg,void ** resultp)3924 zen_umc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp)
3925 {
3926 	zen_umc_t *umc;
3927 
3928 	if (zen_umc == NULL || zen_umc->umc_dip == NULL) {
3929 		return (DDI_FAILURE);
3930 	}
3931 	umc = zen_umc;
3932 
3933 	switch (cmd) {
3934 	case DDI_INFO_DEVT2DEVINFO:
3935 		*resultp = (void *)umc->umc_dip;
3936 		break;
3937 	case DDI_INFO_DEVT2INSTANCE:
3938 		*resultp = (void *)(uintptr_t)ddi_get_instance(
3939 		    umc->umc_dip);
3940 		break;
3941 	default:
3942 		return (DDI_FAILURE);
3943 	}
3944 	return (DDI_SUCCESS);
3945 }
3946 
3947 static int
zen_umc_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)3948 zen_umc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
3949 {
3950 	zen_umc_t *umc;
3951 
3952 	if (cmd == DDI_SUSPEND) {
3953 		return (DDI_SUCCESS);
3954 	} else if (cmd != DDI_DETACH) {
3955 		return (DDI_FAILURE);
3956 	}
3957 
3958 	if (zen_umc == NULL) {
3959 		dev_err(dip, CE_WARN, "!asked to detach zen_umc, but it "
3960 		    "was never successfully attached");
3961 		return (DDI_FAILURE);
3962 	}
3963 
3964 	umc = zen_umc;
3965 	zen_umc = NULL;
3966 	zen_umc_cleanup(umc);
3967 	return (DDI_SUCCESS);
3968 }
3969 
3970 static struct cb_ops zen_umc_cb_ops = {
3971 	.cb_open = zen_umc_open,
3972 	.cb_close = zen_umc_close,
3973 	.cb_strategy = nodev,
3974 	.cb_print = nodev,
3975 	.cb_dump = nodev,
3976 	.cb_read = nodev,
3977 	.cb_write = nodev,
3978 	.cb_ioctl = zen_umc_ioctl,
3979 	.cb_devmap = nodev,
3980 	.cb_mmap = nodev,
3981 	.cb_segmap = nodev,
3982 	.cb_chpoll = nochpoll,
3983 	.cb_prop_op = ddi_prop_op,
3984 	.cb_flag = D_MP,
3985 	.cb_rev = CB_REV,
3986 	.cb_aread = nodev,
3987 	.cb_awrite = nodev
3988 };
3989 
3990 static struct dev_ops zen_umc_dev_ops = {
3991 	.devo_rev = DEVO_REV,
3992 	.devo_refcnt = 0,
3993 	.devo_getinfo = zen_umc_getinfo,
3994 	.devo_identify = nulldev,
3995 	.devo_probe = nulldev,
3996 	.devo_attach = zen_umc_attach,
3997 	.devo_detach = zen_umc_detach,
3998 	.devo_reset = nodev,
3999 	.devo_quiesce = ddi_quiesce_not_needed,
4000 	.devo_cb_ops = &zen_umc_cb_ops
4001 };
4002 
4003 static struct modldrv zen_umc_modldrv = {
4004 	.drv_modops = &mod_driverops,
4005 	.drv_linkinfo = "AMD Zen Unified Memory Controller",
4006 	.drv_dev_ops = &zen_umc_dev_ops
4007 };
4008 
4009 static struct modlinkage zen_umc_modlinkage = {
4010 	.ml_rev = MODREV_1,
4011 	.ml_linkage = { &zen_umc_modldrv, NULL }
4012 };
4013 
4014 int
_init(void)4015 _init(void)
4016 {
4017 	return (mod_install(&zen_umc_modlinkage));
4018 }
4019 
4020 int
_info(struct modinfo * modinfop)4021 _info(struct modinfo *modinfop)
4022 {
4023 	return (mod_info(&zen_umc_modlinkage, modinfop));
4024 }
4025 
4026 int
_fini(void)4027 _fini(void)
4028 {
4029 	return (mod_remove(&zen_umc_modlinkage));
4030 }
4031