xref: /illumos-gate/usr/src/common/mc/zen_umc/zen_umc_decode.c (revision 05ce3950cb6a645887911ba82ec91e3c06c5ad7c)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2024 Oxide Computer Company
14  */
15 
16 /*
17  * Zen UMC Decoding logic. See zen_umc.c for an overview of everything. This
18  * implements shared userland/kernel decoding.
19  */
20 
21 #include "zen_umc.h"
22 
23 #ifndef _KERNEL
24 #include <strings.h>
25 #endif
26 
27 /*
28  * Address constants.
29  */
30 #define	ZEN_UMC_TOM2_START	0x100000000ULL
31 #define	ZEN_UMC_TOM2_RSVD_BEGIN	0xfd00000000ULL
32 #define	ZEN_UMC_TOM2_RSVD_END	0x10000000000ULL
33 
34 /*
35  * COD based hashing constants.
36  */
37 #define	ZEN_UMC_COD_NBITS	3
38 #define	ZEN_UMC_NPS_MOD_NBITS	3
39 
40 /*
41  * Enumeration that represents which parts of the NPS 1K/2K non-power of 2 hash
42  * we should use. These are ordered such their indexes correspond with the
43  * 'hashes' array indexes used in zen_umc_decode_ileave_nps_k_mod().
44  */
45 typedef enum {
46 	ZEN_UMC_NP2_K_HASH_8 = 0,
47 	ZEN_UMC_NP2_K_HASH_9,
48 	ZEN_UMC_NP2_K_HASH_12,
49 	ZEN_UMC_NP2_K_HASH_13
50 } zen_umc_np2_k_hash_t;
51 
52 typedef struct {
53 	/*
54 	 * Indicates what the type of this rule is.
55 	 */
56 	df_chan_ileave_t zukr_type;
57 	/*
58 	 * This is the modulus that this rule uses.
59 	 */
60 	uint32_t zukr_mod;
61 	/*
62 	 * Indicates that this rule requires socket interleaving. Otherwise we
63 	 * expect no socket interleaving to be enabled.
64 	 */
65 	boolean_t zukr_sock;
66 	/*
67 	 * This is the 'high' portion of the original address that is used as
68 	 * part of the division and modulus logic when we take it. This bit is
69 	 * inclusive, e.g. a value of 12 indicates we want addr[64:12].
70 	 */
71 	uint32_t zukr_high;
72 	/*
73 	 * This indicates at what point in the modulus address the high bits
74 	 * should arrive at.
75 	 */
76 	uint32_t zukr_mod_shift;
77 	/*
78 	 * This indicates how we should fill the remaining bits in the modulus
79 	 * address. This is either zero filled or an original address bit. Only
80 	 * address bits 8 or 9 are ever used so we cheat and treat a zero here
81 	 * as zero filled. Only the first zukr_mod_shift bits will be
82 	 * considered. This and zukr_mod_shit are used prior to the modulus
83 	 * calculation.
84 	 */
85 	uint32_t zukr_mod_fill[5];
86 	/*
87 	 * The next series of values defines how to construct the channel. The
88 	 * channel is always made up of some number of bits from the modulus
89 	 * value and then optionally some of the hash bits. The first value
90 	 * indicates how many bits to shift the resulting modulus value by. Any
91 	 * bit that it is shifted over by must be filled by a hashed value. The
92 	 * indication of which hash bit is indicated by its starting address
93 	 * number.
94 	 */
95 	uint32_t zukr_chan_mod_shift;
96 	zen_umc_np2_k_hash_t zukr_chan_fill[2];
97 	/*
98 	 * Next, it's time to describe how to construct the normalized address.
99 	 * There is a portion of it which is divided by the modulus. This is
100 	 * always going to be the high bits, but sometimes includes additional
101 	 * lower parts of the physical address ORed in. The first value
102 	 * indicates how many consecutive address bits should be included. The
103 	 * second indicates the starting address.
104 	 */
105 	uint32_t zukr_div_addr;
106 	uint32_t zukr_div_naddr;
107 	/*
108 	 * Finally the middle portion of the normalized address.
109 	 */
110 	uint32_t zukr_norm_addr;
111 	uint32_t zukr_norm_naddr;
112 } zen_umc_np2_k_rule_t;
113 
114 const zen_umc_np2_k_rule_t zen_umc_np2_k_rules[] = { {
115 	.zukr_type = DF_CHAN_ILEAVE_NPS4_3CH_1K,
116 	.zukr_mod = 3,
117 	.zukr_high = 12,
118 	.zukr_mod_shift = 2,
119 	.zukr_mod_fill = { 8, 9 },
120 	.zukr_chan_mod_shift = 0,
121 	.zukr_div_addr = 8,
122 	.zukr_div_naddr = 2,
123 	.zukr_norm_addr = 10,
124 	.zukr_norm_naddr = 2
125 }, {
126 	.zukr_type = DF_CHAN_ILEAVE_NPS4_3CH_2K,
127 	.zukr_mod = 3,
128 	.zukr_high = 12,
129 	.zukr_mod_shift = 2,
130 	.zukr_mod_fill = { 0, 8 },
131 	.zukr_chan_mod_shift = 0,
132 	.zukr_div_addr = 8,
133 	.zukr_div_naddr = 1,
134 	.zukr_norm_addr = 9,
135 	.zukr_norm_naddr = 3
136 }, {
137 	.zukr_type = DF_CHAN_ILEAVE_NPS2_6CH_1K,
138 	.zukr_mod = 3,
139 	.zukr_high = 12,
140 	.zukr_mod_shift = 2,
141 	.zukr_mod_fill = { 0, 9 },
142 	.zukr_chan_mod_shift = 1,
143 	.zukr_chan_fill = { ZEN_UMC_NP2_K_HASH_8 },
144 	.zukr_div_addr = 9,
145 	.zukr_div_naddr = 1,
146 	.zukr_norm_addr = 10,
147 	.zukr_norm_naddr = 2
148 }, {
149 	.zukr_type = DF_CHAN_ILEAVE_NPS2_6CH_2K,
150 	.zukr_mod = 3,
151 	.zukr_high = 12,
152 	.zukr_mod_shift = 2,
153 	.zukr_mod_fill = { 0, 0 },
154 	.zukr_chan_mod_shift = 1,
155 	.zukr_chan_fill = { ZEN_UMC_NP2_K_HASH_8 },
156 	.zukr_div_naddr = 0,
157 	.zukr_norm_addr = 9,
158 	.zukr_norm_naddr = 3
159 }, {
160 	.zukr_type = DF_CHAN_ILEAVE_NPS1_12CH_1K,
161 	.zukr_mod = 3,
162 	.zukr_high = 12,
163 	.zukr_mod_shift = 2,
164 	.zukr_mod_fill = { 0, 0 },
165 	.zukr_chan_mod_shift = 2,
166 	.zukr_chan_fill = { ZEN_UMC_NP2_K_HASH_8, ZEN_UMC_NP2_K_HASH_9 },
167 	.zukr_div_naddr = 0,
168 	.zukr_norm_addr = 10,
169 	.zukr_norm_naddr = 2
170 }, {
171 	.zukr_type = DF_CHAN_ILEAVE_NPS1_12CH_1K,
172 	.zukr_mod = 3,
173 	.zukr_high = 12,
174 	.zukr_mod_shift = 2,
175 	.zukr_mod_fill = { 0, 0 },
176 	.zukr_chan_mod_shift = 2,
177 	.zukr_chan_fill = { ZEN_UMC_NP2_K_HASH_8, ZEN_UMC_NP2_K_HASH_9 },
178 	.zukr_div_naddr = 0,
179 	.zukr_norm_addr = 10,
180 	.zukr_norm_naddr = 2
181 }, {
182 	.zukr_type = DF_CHAN_ILEAVE_NPS1_12CH_2K,
183 	.zukr_mod = 3,
184 	.zukr_high = 13,
185 	.zukr_mod_shift = 3,
186 	.zukr_mod_fill = { 0, 0, 0 },
187 	.zukr_chan_mod_shift = 2,
188 	.zukr_chan_fill = { ZEN_UMC_NP2_K_HASH_8, ZEN_UMC_NP2_K_HASH_12 },
189 	.zukr_div_naddr = 0,
190 	.zukr_norm_addr = 9,
191 	.zukr_norm_naddr = 3
192 }, {
193 	.zukr_type = DF_CHAN_ILEAVE_NPS0_24CH_1K,
194 	.zukr_mod = 3,
195 	.zukr_sock = B_TRUE,
196 	.zukr_high = 13,
197 	.zukr_mod_shift = 3,
198 	.zukr_mod_fill = { 0, 0, 0 },
199 	.zukr_chan_mod_shift = 2,
200 	.zukr_chan_fill = { ZEN_UMC_NP2_K_HASH_9, ZEN_UMC_NP2_K_HASH_12 },
201 	.zukr_div_naddr = 0,
202 	.zukr_norm_addr = 10,
203 	.zukr_norm_naddr = 2
204 }, {
205 	.zukr_type = DF_CHAN_ILEAVE_NPS0_24CH_2K,
206 	.zukr_mod = 3,
207 	.zukr_sock = B_TRUE,
208 	.zukr_high = 14,
209 	.zukr_mod_shift = 4,
210 	.zukr_mod_fill = { 0, 0, 0, 0 },
211 	.zukr_chan_mod_shift = 2,
212 	.zukr_chan_fill = { ZEN_UMC_NP2_K_HASH_12, ZEN_UMC_NP2_K_HASH_13 },
213 	.zukr_div_naddr = 0,
214 	.zukr_norm_addr = 9,
215 	.zukr_norm_naddr = 3
216 }, {
217 	.zukr_type = DF_CHAN_ILEAVE_NPS2_5CH_1K,
218 	.zukr_mod = 5,
219 	.zukr_high = 12,
220 	.zukr_mod_shift = 2,
221 	.zukr_mod_fill = { 8, 9 },
222 	.zukr_chan_mod_shift = 0,
223 	.zukr_div_addr = 8,
224 	.zukr_div_naddr = 2,
225 	.zukr_norm_addr = 10,
226 	.zukr_norm_naddr = 2
227 }, {
228 	.zukr_type = DF_CHAN_ILEAVE_NPS2_5CH_2K,
229 	.zukr_mod = 5,
230 	.zukr_high = 12,
231 	.zukr_mod_shift = 2,
232 	.zukr_mod_fill = { 0, 8 },
233 	.zukr_chan_mod_shift = 0,
234 	.zukr_div_addr = 8,
235 	.zukr_div_naddr = 1,
236 	.zukr_norm_addr = 9,
237 	.zukr_norm_naddr = 3
238 }, {
239 	.zukr_type = DF_CHAN_ILEAVE_NPS1_10CH_1K,
240 	.zukr_mod = 5,
241 	.zukr_high = 12,
242 	.zukr_mod_shift = 2,
243 	.zukr_mod_fill = { 0, 9 },
244 	.zukr_chan_mod_shift = 1,
245 	.zukr_chan_fill = { ZEN_UMC_NP2_K_HASH_8 },
246 	.zukr_div_addr = 9,
247 	.zukr_div_naddr = 1,
248 	.zukr_norm_addr = 10,
249 	.zukr_norm_naddr = 2
250 }, {
251 	.zukr_type = DF_CHAN_ILEAVE_NPS1_10CH_2K,
252 	.zukr_mod = 5,
253 	.zukr_high = 12,
254 	.zukr_mod_shift = 2,
255 	.zukr_mod_fill = { 0, 0 },
256 	.zukr_chan_mod_shift = 1,
257 	.zukr_chan_fill = { ZEN_UMC_NP2_K_HASH_8 },
258 	.zukr_div_naddr = 0,
259 	.zukr_norm_addr = 9,
260 	.zukr_norm_naddr = 3
261 } };
262 
263 /*
264  * We want to apply some initial heuristics to determine if a physical address
265  * is DRAM before we proceed because of the MMIO hole and related. The DRAM
266  * ranges can overlap with these system reserved ranges so we have to manually
267  * check these.  Effectively this means that we have a few valid ranges:
268  *
269  *  o [ 0, TOM )
270  *  o [ 4 GiB, TOM2 )
271  *
272  * However, the above 4 GiB runs into trouble depending on size. There is a 12
273  * GiB system reserved address region right below 1 TiB. So it really turns
274  * into the following when we have more than 1 TiB of DRAM:
275  *
276  *  o [ 0, TOM )
277  *  o [ 4 GiB, 1 TiB - 12 GiB )
278  *  o [ 1 TiB, TOM2 )
279  *
280  * Note, this does not currently scan MTRRs or MMIO rules for what might be
281  * redirected to MMIO.
282  */
283 static boolean_t
zen_umc_decode_is_dram(const zen_umc_t * umc,zen_umc_decoder_t * dec)284 zen_umc_decode_is_dram(const zen_umc_t *umc, zen_umc_decoder_t *dec)
285 {
286 	if (dec->dec_pa < umc->umc_tom) {
287 		return (B_TRUE);
288 	}
289 
290 	if (dec->dec_pa >= umc->umc_tom2) {
291 		dec->dec_fail = ZEN_UMC_DECODE_F_OUTSIDE_DRAM;
292 		return (B_FALSE);
293 	}
294 
295 	/*
296 	 * If the address is in the reserved hole around 1 TiB, do not proceed.
297 	 */
298 	if (dec->dec_pa >= ZEN_UMC_TOM2_RSVD_BEGIN &&
299 	    dec->dec_pa < ZEN_UMC_TOM2_RSVD_END) {
300 		dec->dec_fail = ZEN_UMC_DECODE_F_OUTSIDE_DRAM;
301 		return (B_FALSE);
302 	}
303 
304 	/*
305 	 * Now that we've validated we're not in the hole, check to see if we're
306 	 * actually in a valid region for TOM2.
307 	 */
308 	if (dec->dec_pa >= ZEN_UMC_TOM2_START &&
309 	    dec->dec_pa < umc->umc_tom2) {
310 		return (B_TRUE);
311 	}
312 
313 	/*
314 	 * At this point we have eliminated all known DRAM regions described by
315 	 * TOM and TOM2, so we have to conclude that whatever we're looking at
316 	 * is now not part of DRAM.
317 	 */
318 	dec->dec_fail = ZEN_UMC_DECODE_F_OUTSIDE_DRAM;
319 	return (B_FALSE);
320 }
321 
322 /*
323  * In our first stop on decoding, we need to go through and take a physical
324  * address and figure out what the corresponding initial DF rule that applies
325  * is. This rule will then be used to figure out which target on the data fabric
326  * we should be going to and what interleaving rules apply.
327  *
328  * Our DRAM rule may reflect that the DRAM hole is active. In this case the
329  * specified range in the rule will be larger than the actual amount of DRAM
330  * present. MMIO accesses take priority over DRAM accesses in the core and
331  * therefore the MMIO portion of the rule is not actually decoded. When trying
332  * to match a rule we do not need to worry about that and can just look whether
333  * our physical address matches a rule. We will take into account whether
334  * hoisting should adjust the address when we translate from a system address to
335  * a normal address (e.g. an address in the channel) which will be done in a
336  * subsequent step. If an address is in the hole, that has already been
337  * accounted for.
338  *
339  * While gathering information, we have all the DRAM rules for a given CCM that
340  * corresponds to a CPU core. This allows us to review all DRAM rules in one
341  * place rather than walking through what's been assigned to each UMC instance,
342  * which only has the rules that are directed towards that particular channel
343  * and matter for determining channel offsets.
344  */
345 static boolean_t
zen_umc_decode_find_df_rule(const zen_umc_t * umc,zen_umc_decoder_t * dec)346 zen_umc_decode_find_df_rule(const zen_umc_t *umc, zen_umc_decoder_t *dec)
347 {
348 	const zen_umc_df_t *df = &umc->umc_dfs[0];
349 
350 	for (uint_t i = 0; i < df->zud_dram_nrules; i++) {
351 		const df_dram_rule_t *rule = &df->zud_rules[i];
352 
353 		/*
354 		 * If this rule is not enabled, skip it.
355 		 */
356 		if ((rule->ddr_flags & DF_DRAM_F_VALID) == 0)
357 			continue;
358 
359 		if (dec->dec_pa >= rule->ddr_base &&
360 		    dec->dec_pa < rule->ddr_limit) {
361 			dec->dec_df_ruleno = i;
362 			dec->dec_df_rule = rule;
363 			dec->dec_df_rulesrc = df;
364 			return (B_TRUE);
365 		}
366 	}
367 
368 	dec->dec_fail = ZEN_UMC_DECODE_F_NO_DF_RULE;
369 	return (B_FALSE);
370 }
371 
372 /*
373  * This function takes care of the common logic of adjusting an address by the
374  * base value in the rule and determining if we need to apply the DRAM hole or
375  * not. This function is used in two different places:
376  *
377  *   o As part of adjusting the system address to construct the interleave
378  *     address for DFv4 and Zen 3 based 6-channel hashing (see
379  *     zen_umc_determine_ileave_addr() below).
380  *   o As part of adjusting the system address at the beginning of normalization
381  *     to a channel address.
382  *
383  * One thing to highlight is that the same adjustment we make in the first case
384  * applies to a subset of things for interleaving; however, it applies to
385  * everything when normalizing.
386  */
387 static boolean_t
zen_umc_adjust_dram_addr(const zen_umc_t * umc,zen_umc_decoder_t * dec,uint64_t * addrp,zen_umc_decode_failure_t errno)388 zen_umc_adjust_dram_addr(const zen_umc_t *umc, zen_umc_decoder_t *dec,
389     uint64_t *addrp, zen_umc_decode_failure_t errno)
390 {
391 	const uint64_t init_addr = *addrp;
392 	const df_dram_rule_t *rule = dec->dec_df_rule;
393 	const zen_umc_df_t *df = dec->dec_df_rulesrc;
394 	uint64_t mod_addr = init_addr;
395 
396 	ASSERT3U(init_addr, >=, rule->ddr_base);
397 	ASSERT3U(init_addr, <, rule->ddr_limit);
398 	mod_addr -= rule->ddr_base;
399 
400 	/*
401 	 * Determine if the hole applies to this rule.
402 	 */
403 	if ((rule->ddr_flags & DF_DRAM_F_HOLE) != 0 &&
404 	    (df->zud_flags & ZEN_UMC_DF_F_HOLE_VALID) != 0 &&
405 	    init_addr >= ZEN_UMC_TOM2_START) {
406 		uint64_t hole_size;
407 		hole_size = ZEN_UMC_TOM2_START -
408 		    umc->umc_dfs[0].zud_hole_base;
409 		if (mod_addr < hole_size) {
410 			dec->dec_fail = errno;
411 			dec->dec_fail_data = dec->dec_df_ruleno;
412 			return (B_FALSE);
413 		}
414 
415 		mod_addr -= hole_size;
416 	}
417 
418 	*addrp = mod_addr;
419 	return (B_TRUE);
420 }
421 
422 /*
423  * Take care of constructing the address we need to use for determining the
424  * interleaving target fabric id. See the big theory statement in zen_umc.c for
425  * more on this.
426  */
427 static boolean_t
zen_umc_determine_ileave_addr(const zen_umc_t * umc,zen_umc_decoder_t * dec)428 zen_umc_determine_ileave_addr(const zen_umc_t *umc, zen_umc_decoder_t *dec)
429 {
430 	const df_dram_rule_t *rule = dec->dec_df_rule;
431 
432 	if ((umc->umc_df_rev <= DF_REV_3 &&
433 	    rule->ddr_chan_ileave != DF_CHAN_ILEAVE_6CH) ||
434 	    umc->umc_df_rev >= DF_REV_4D2) {
435 		dec->dec_ilv_pa = dec->dec_pa;
436 		return (B_TRUE);
437 	}
438 
439 	dec->dec_ilv_pa = dec->dec_pa;
440 	if (!zen_umc_adjust_dram_addr(umc, dec, &dec->dec_ilv_pa,
441 	    ZEN_UMC_DECODE_F_ILEAVE_UNDERFLOW)) {
442 		return (B_FALSE);
443 	}
444 
445 	return (B_TRUE);
446 }
447 
448 /*
449  * This is a simple interleaving case where we simply extract bits. No hashing
450  * required! Per zen_umc.c, from lowest to highest, we have channel, die, and
451  * then socket bits.
452  */
453 static boolean_t
zen_umc_decode_ileave_nohash(const zen_umc_t * umc,zen_umc_decoder_t * dec)454 zen_umc_decode_ileave_nohash(const zen_umc_t *umc, zen_umc_decoder_t *dec)
455 {
456 	uint32_t nchan_bit, ndie_bit, nsock_bit, addr_bit;
457 	const df_dram_rule_t *rule = dec->dec_df_rule;
458 
459 	nsock_bit = rule->ddr_sock_ileave_bits;
460 	ndie_bit = rule->ddr_die_ileave_bits;
461 	switch (rule->ddr_chan_ileave) {
462 	case DF_CHAN_ILEAVE_1CH:
463 		nchan_bit = 0;
464 		break;
465 	case DF_CHAN_ILEAVE_2CH:
466 		nchan_bit = 1;
467 		break;
468 	case DF_CHAN_ILEAVE_4CH:
469 		nchan_bit = 2;
470 		break;
471 	case DF_CHAN_ILEAVE_8CH:
472 		nchan_bit = 3;
473 		break;
474 	case DF_CHAN_ILEAVE_16CH:
475 		nchan_bit = 4;
476 		break;
477 	case DF_CHAN_ILEAVE_32CH:
478 		nchan_bit = 5;
479 		break;
480 	default:
481 		dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP;
482 		dec->dec_fail_data = rule->ddr_chan_ileave;
483 		return (B_FALSE);
484 	}
485 
486 	/*
487 	 * Zero all of these out in case no bits are dedicated to this purpose.
488 	 * In those cases, then the value for this is always zero.
489 	 */
490 	dec->dec_ilv_sock = dec->dec_ilv_die = dec->dec_ilv_chan = 0;
491 	addr_bit = rule->ddr_addr_start;
492 	if (nchan_bit > 0) {
493 		dec->dec_ilv_chan = bitx64(dec->dec_ilv_pa,
494 		    addr_bit + nchan_bit - 1, addr_bit);
495 		addr_bit += nchan_bit;
496 	}
497 
498 	if (ndie_bit > 0) {
499 		dec->dec_ilv_die = bitx64(dec->dec_ilv_pa,
500 		    addr_bit + ndie_bit - 1, addr_bit);
501 		addr_bit += ndie_bit;
502 	}
503 
504 	if (nsock_bit > 0) {
505 		dec->dec_ilv_sock = bitx64(dec->dec_ilv_pa,
506 		    addr_bit + nsock_bit - 1, addr_bit);
507 		addr_bit += nsock_bit;
508 	}
509 
510 	return (B_TRUE);
511 }
512 
513 /*
514  * Perform the Zen 2/Zen 3 "COD" based hashing. See the zen_umc.c interleaving
515  * section of the big theory statement for an overview of how this works.
516  */
517 static boolean_t
zen_umc_decode_ileave_cod(const zen_umc_t * umc,zen_umc_decoder_t * dec)518 zen_umc_decode_ileave_cod(const zen_umc_t *umc, zen_umc_decoder_t *dec)
519 {
520 	uint32_t nchan_bit;
521 	const df_dram_rule_t *rule = dec->dec_df_rule;
522 	/*
523 	 * The order of bits here is defined by AMD. Yes, we do use the rule's
524 	 * address bit first and then skip to bit 12 for the second hash bit.
525 	 */
526 	const uint32_t addr_bits[3] = { rule->ddr_addr_start, 12, 13 };
527 
528 	if (rule->ddr_sock_ileave_bits != 0 || rule->ddr_die_ileave_bits != 0) {
529 		dec->dec_fail = ZEN_UMC_DECODE_F_COD_BAD_ILEAVE;
530 		dec->dec_fail_data = dec->dec_df_ruleno;
531 		return (B_FALSE);
532 	}
533 
534 	switch (rule->ddr_chan_ileave) {
535 	case DF_CHAN_ILEAVE_COD4_2CH:
536 		nchan_bit = 1;
537 		break;
538 	case DF_CHAN_ILEAVE_COD2_4CH:
539 		nchan_bit = 2;
540 		break;
541 	case DF_CHAN_ILEAVE_COD1_8CH:
542 		nchan_bit = 3;
543 		break;
544 	default:
545 		dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP;
546 		dec->dec_fail_data = rule->ddr_chan_ileave;
547 		return (B_FALSE);
548 	}
549 
550 	dec->dec_ilv_sock = dec->dec_ilv_die = dec->dec_ilv_chan = 0;
551 
552 	/*
553 	 * Proceed to calculate the address hash based on the number of bits
554 	 * that we have been told to use based on the DF rule. Use the flags in
555 	 * the rule to determine which additional address ranges to hash in.
556 	 */
557 	for (uint_t i = 0; i < nchan_bit; i++) {
558 		uint8_t hash = 0;
559 
560 		hash = bitx64(dec->dec_ilv_pa, addr_bits[i], addr_bits[i]);
561 		if ((rule->ddr_flags & DF_DRAM_F_HASH_16_18) != 0) {
562 			uint8_t val = bitx64(dec->dec_ilv_pa, 16 + i, 16 + i);
563 			hash ^= val;
564 		}
565 
566 		if ((rule->ddr_flags & DF_DRAM_F_HASH_21_23) != 0) {
567 			uint8_t val = bitx64(dec->dec_ilv_pa, 21 + i, 21 + i);
568 			hash ^= val;
569 		}
570 
571 		if ((rule->ddr_flags & DF_DRAM_F_HASH_30_32) != 0) {
572 			uint8_t val = bitx64(dec->dec_ilv_pa, 30 + i, 30 + i);
573 			hash ^= val;
574 		}
575 
576 		dec->dec_ilv_chan |= hash << i;
577 	}
578 
579 	return (B_TRUE);
580 }
581 
582 /*
583  * Common logic to perform hashing across the NPS, NPS 1K, and NPS 2K variants.
584  */
585 static void
zen_umc_decode_ileave_nps_common(zen_umc_decoder_t * dec,const uint32_t * addr_bits,const uint32_t * adj,uint32_t nsock_bits,uint32_t nchan_bits,boolean_t df4p0)586 zen_umc_decode_ileave_nps_common(zen_umc_decoder_t *dec,
587     const uint32_t *addr_bits, const uint32_t *adj, uint32_t nsock_bits,
588     uint32_t nchan_bits, boolean_t df4p0)
589 {
590 	const df_dram_rule_t *rule = dec->dec_df_rule;
591 
592 	for (uint32_t i = 0; i < nchan_bits + nsock_bits; i++) {
593 		uint8_t hash = 0;
594 
595 		hash = bitx64(dec->dec_ilv_pa, addr_bits[i], addr_bits[i]);
596 		if ((rule->ddr_flags & DF_DRAM_F_HASH_16_18) != 0) {
597 			uint8_t val = bitx64(dec->dec_ilv_pa, 16 + adj[i],
598 			    16 + adj[i]);
599 			hash ^= val;
600 		}
601 
602 		if ((rule->ddr_flags & DF_DRAM_F_HASH_21_23) != 0) {
603 			uint8_t val = bitx64(dec->dec_ilv_pa, 21 + adj[i],
604 			    21 + adj[i]);
605 			hash ^= val;
606 		}
607 
608 		if ((rule->ddr_flags & DF_DRAM_F_HASH_30_32) != 0) {
609 			uint8_t val = bitx64(dec->dec_ilv_pa, 30 + adj[i], 30 +
610 			    adj[i]);
611 			hash ^= val;
612 		}
613 
614 		/*
615 		 * While 1T is only supported in the NPS 1K/2K variant, rule
616 		 * normalization means this won't be set in the plain NPS case.
617 		 */
618 		if ((rule->ddr_flags & DF_DRAM_F_HASH_40_42) != 0) {
619 			uint8_t val = bitx64(dec->dec_ilv_pa, 40 + adj[i],
620 			    40 + adj[i]);
621 			hash ^= val;
622 		}
623 
624 		/*
625 		 * If this is the first bit and we're not doing socket
626 		 * interleaving, then we need to add bit 14 to the running hash.
627 		 * This is only true for a strict DF v4.0 NPS style hash. We
628 		 * don't perform this for the 1K/2K variant.
629 		 */
630 		if (i == 0 && nsock_bits == 0 && df4p0) {
631 			uint8_t val = bitx64(dec->dec_ilv_pa, 14, 14);
632 			hash ^= val;
633 		}
634 
635 		/*
636 		 * If socket interleaving is going on we need to store the first
637 		 * bit as the socket hash and then redirect the remaining bits
638 		 * to the channel, taking into account that the shift will be
639 		 * adjusted as a result.
640 		 */
641 		if (nsock_bits > 0) {
642 			if (i == 0) {
643 				dec->dec_ilv_sock = hash;
644 			} else {
645 				dec->dec_ilv_chan |= hash << (i - 1);
646 			}
647 		} else {
648 			dec->dec_ilv_chan |= hash << i;
649 		}
650 	}
651 }
652 
653 
654 /*
655  * This implements the standard NPS hash for power of 2 based channel
656  * configurations that is found in DFv4. For more information, please see the
657  * interleaving portion of the zen_umc.c big theory statement.
658  */
659 static boolean_t
zen_umc_decode_ileave_nps(const zen_umc_t * umc,zen_umc_decoder_t * dec)660 zen_umc_decode_ileave_nps(const zen_umc_t *umc, zen_umc_decoder_t *dec)
661 {
662 	uint32_t nchan_bit, nsock_bit;
663 	const df_dram_rule_t *rule = dec->dec_df_rule;
664 	/*
665 	 * The order of bits here is defined by AMD. Yes, this is start with the
666 	 * defined address bit and then skip to bit 12.
667 	 */
668 	const uint32_t addr_bits[4] = { rule->ddr_addr_start, 12, 13, 14 };
669 	const uint32_t adj[4] = { 0, 1, 2, 3 };
670 
671 	if (rule->ddr_die_ileave_bits != 0) {
672 		dec->dec_fail = ZEN_UMC_DECODE_F_NPS_BAD_ILEAVE;
673 		dec->dec_fail_data = dec->dec_df_ruleno;
674 		return (B_FALSE);
675 	}
676 
677 	nsock_bit = rule->ddr_sock_ileave_bits;
678 	switch (rule->ddr_chan_ileave) {
679 	case DF_CHAN_ILEAVE_NPS4_2CH:
680 		nchan_bit = 1;
681 		break;
682 	case DF_CHAN_ILEAVE_NPS2_4CH:
683 		nchan_bit = 2;
684 		break;
685 	case DF_CHAN_ILEAVE_NPS1_8CH:
686 		nchan_bit = 3;
687 		break;
688 	default:
689 		dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP;
690 		dec->dec_fail_data = rule->ddr_chan_ileave;
691 		return (B_FALSE);
692 	}
693 
694 	ASSERT3U(nchan_bit + nsock_bit, <=, 4);
695 	dec->dec_ilv_sock = dec->dec_ilv_die = dec->dec_ilv_chan = 0;
696 
697 	zen_umc_decode_ileave_nps_common(dec, addr_bits, adj, nsock_bit,
698 	    nchan_bit, B_TRUE);
699 	return (B_TRUE);
700 }
701 
702 /*
703  * This implements the Zen 5 (really DF 4D2) NPS variants that work on both 1K
704  * and 2K hashing.
705  */
706 static boolean_t
zen_umc_decode_ileave_nps_k(const zen_umc_t * umc,zen_umc_decoder_t * dec)707 zen_umc_decode_ileave_nps_k(const zen_umc_t *umc, zen_umc_decoder_t *dec)
708 {
709 	uint32_t nchan_bit, nsock_bit;
710 	const df_dram_rule_t *rule = dec->dec_df_rule;
711 	const uint32_t addr_bits_1k[5] = { rule->ddr_addr_start, 9, 12, 13,
712 	    14 };
713 	const uint32_t addr_bits_2k[4] = { rule->ddr_addr_start, 12, 13, 14 };
714 	const uint32_t adj_1k[5] = { 0, 1, 2, 3, 4 };
715 	const uint32_t adj_2k[4] = { 0, 2, 3, 4 };
716 	const uint32_t *addr_bits;
717 	const uint32_t *adj;
718 
719 	if (rule->ddr_die_ileave_bits != 0 || rule->ddr_addr_start != 8) {
720 		dec->dec_fail = ZEN_UMC_DECODE_F_NPS_BAD_ILEAVE;
721 		dec->dec_fail_data = dec->dec_df_ruleno;
722 		return (B_FALSE);
723 	}
724 
725 	nsock_bit = rule->ddr_sock_ileave_bits;
726 	switch (rule->ddr_chan_ileave) {
727 	case DF_CHAN_ILEAVE_NPS4_2CH_1K:
728 	case DF_CHAN_ILEAVE_NPS4_2CH_2K:
729 		nchan_bit = 1;
730 		break;
731 	case DF_CHAN_ILEAVE_NPS2_4CH_1K:
732 	case DF_CHAN_ILEAVE_NPS2_4CH_2K:
733 		nchan_bit = 2;
734 		break;
735 	case DF_CHAN_ILEAVE_NPS1_8CH_1K:
736 	case DF_CHAN_ILEAVE_NPS1_8CH_2K:
737 		nchan_bit = 3;
738 		break;
739 	case DF_CHAN_ILEAVE_NPS1_16CH_1K:
740 	case DF_CHAN_ILEAVE_NPS1_16CH_2K:
741 		nchan_bit = 4;
742 		break;
743 	default:
744 		dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP;
745 		dec->dec_fail_data = rule->ddr_chan_ileave;
746 		return (B_FALSE);
747 	}
748 
749 	switch (rule->ddr_chan_ileave) {
750 	case DF_CHAN_ILEAVE_NPS4_2CH_1K:
751 	case DF_CHAN_ILEAVE_NPS2_4CH_1K:
752 	case DF_CHAN_ILEAVE_NPS1_8CH_1K:
753 	case DF_CHAN_ILEAVE_NPS1_16CH_1K:
754 		ASSERT3U(nchan_bit + nsock_bit, <=, 5);
755 		addr_bits = addr_bits_1k;
756 		adj = adj_1k;
757 		break;
758 	case DF_CHAN_ILEAVE_NPS4_2CH_2K:
759 	case DF_CHAN_ILEAVE_NPS2_4CH_2K:
760 	case DF_CHAN_ILEAVE_NPS1_8CH_2K:
761 	case DF_CHAN_ILEAVE_NPS1_16CH_2K:
762 		ASSERT3U(nchan_bit + nsock_bit, <=, 4);
763 		addr_bits = addr_bits_2k;
764 		adj = adj_2k;
765 		break;
766 	default:
767 		dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP;
768 		dec->dec_fail_data = rule->ddr_chan_ileave;
769 		return (B_FALSE);
770 	}
771 
772 	dec->dec_ilv_sock = dec->dec_ilv_die = dec->dec_ilv_chan = 0;
773 	zen_umc_decode_ileave_nps_common(dec, addr_bits, adj, nsock_bit,
774 	    nchan_bit, B_FALSE);
775 	return (B_TRUE);
776 }
777 
778 /*
779  * This implements the logic to perform the Zen 3 6ch special hash. It's worth
780  * calling out that unlike all other hash functions, this does not support the
781  * use of the DF_DRAM_F_HASH_16_18 flag.
782  */
783 static void
zen_umc_decode_hash_zen3_6ch(const df_dram_rule_t * rule,uint64_t pa,uint8_t hashes[3])784 zen_umc_decode_hash_zen3_6ch(const df_dram_rule_t *rule, uint64_t pa,
785     uint8_t hashes[3])
786 {
787 	uint32_t addr_bit = rule->ddr_addr_start;
788 	/*
789 	 * Yes, we use these in a weird order. No, there is no 64K.
790 	 */
791 	const uint32_t bits_2M[3] = { 23, 21, 22 };
792 	const uint32_t bits_1G[3] = { 32, 30, 31 };
793 
794 	hashes[0] = hashes[1] = hashes[2] = 0;
795 	for (uint_t i = 0; i < ZEN_UMC_COD_NBITS; i++) {
796 		hashes[i] = bitx64(pa, addr_bit + i, addr_bit + i);
797 		if (i == 0) {
798 			uint8_t val = bitx64(pa, addr_bit + 3, addr_bit + 3);
799 			hashes[i] ^= val;
800 		}
801 
802 		if ((rule->ddr_flags & DF_DRAM_F_HASH_21_23) != 0) {
803 			uint8_t val = bitx64(pa, bits_2M[i], bits_2M[i]);
804 			hashes[i] ^= val;
805 		}
806 
807 		if ((rule->ddr_flags & DF_DRAM_F_HASH_30_32) != 0) {
808 			uint8_t val = bitx64(pa, bits_1G[i], bits_1G[i]);
809 			hashes[i] ^= val;
810 		}
811 	}
812 }
813 
814 /*
815  * Perform Zen 3 6-channel hashing. This is pretty weird compared to others. See
816  * the zen_umc.c big theory statement for the thorny details.
817  */
818 static boolean_t
zen_umc_decode_ileave_zen3_6ch(const zen_umc_t * umc,zen_umc_decoder_t * dec)819 zen_umc_decode_ileave_zen3_6ch(const zen_umc_t *umc, zen_umc_decoder_t *dec)
820 {
821 	uint8_t hashes[3] = { 0 };
822 	const df_dram_rule_t *rule = dec->dec_df_rule;
823 	uint32_t addr_bit = rule->ddr_addr_start;
824 
825 	if (rule->ddr_sock_ileave_bits != 0 || rule->ddr_die_ileave_bits != 0) {
826 		dec->dec_fail = ZEN_UMC_DECODE_F_COD_BAD_ILEAVE;
827 		dec->dec_fail_data = dec->dec_df_ruleno;
828 		return (B_FALSE);
829 	}
830 
831 	zen_umc_decode_hash_zen3_6ch(rule, dec->dec_ilv_pa, hashes);
832 	dec->dec_ilv_sock = dec->dec_ilv_die = dec->dec_ilv_chan = 0;
833 	dec->dec_ilv_chan = hashes[0];
834 	if (hashes[1] == 1 && hashes[2] == 1) {
835 		uint64_t mod_addr = dec->dec_ilv_pa >> (addr_bit + 3);
836 		dec->dec_ilv_chan |= (mod_addr % 3) << 1;
837 	} else {
838 		dec->dec_ilv_chan |= hashes[1] << 1;
839 		dec->dec_ilv_chan |= hashes[2] << 2;
840 	}
841 
842 	return (B_TRUE);
843 }
844 
845 /*
846  * This is the standard hash function for the non-power of two based NPS hashes.
847  * See the big theory statement for more information. Unlike the normal NPS hash
848  * which uses bit 14 conditionally based on socket interleaving, here it is
849  * always used.
850  */
851 static void
zen_umc_decode_hash_nps_mod(const df_dram_rule_t * rule,uint64_t pa,uint8_t hashes[3])852 zen_umc_decode_hash_nps_mod(const df_dram_rule_t *rule, uint64_t pa,
853     uint8_t hashes[3])
854 {
855 	const uint32_t addr_bits[3] = { rule->ddr_addr_start, 12, 13 };
856 
857 	for (uint_t i = 0; i < ZEN_UMC_NPS_MOD_NBITS; i++) {
858 		hashes[i] = bitx64(pa, addr_bits[i], addr_bits[i]);
859 		if (i == 0) {
860 			uint8_t val = bitx64(pa, 14, 14);
861 			hashes[i] ^= val;
862 		}
863 
864 		if ((rule->ddr_flags & DF_DRAM_F_HASH_16_18) != 0) {
865 			uint8_t val = bitx64(pa, 16 + i, 16 + i);
866 			hashes[i] ^= val;
867 		}
868 
869 		if ((rule->ddr_flags & DF_DRAM_F_HASH_21_23) != 0) {
870 			uint8_t val = bitx64(pa, 21 + i, 21 + i);
871 			hashes[i] ^= val;
872 		}
873 
874 		if ((rule->ddr_flags & DF_DRAM_F_HASH_30_32) != 0) {
875 			uint8_t val = bitx64(pa, 30 + i, 30 + i);
876 			hashes[i] ^= val;
877 		}
878 	}
879 }
880 
881 static void
zen_umc_decode_hash_nps_k_mod(const df_dram_rule_t * rule,uint64_t pa,uint8_t hashes[4])882 zen_umc_decode_hash_nps_k_mod(const df_dram_rule_t *rule, uint64_t pa,
883     uint8_t hashes[4])
884 {
885 	const uint32_t addr_bits[4] = { rule->ddr_addr_start, 9, 12, 13 };
886 
887 	for (size_t i = 0; i < ARRAY_SIZE(addr_bits); i++) {
888 		hashes[i] = bitx64(pa, addr_bits[i], addr_bits[i]);
889 		if (i == 0) {
890 			uint8_t val = bitx64(pa, 14, 14);
891 			hashes[i] ^= val;
892 		}
893 
894 		if ((rule->ddr_flags & DF_DRAM_F_HASH_16_18) != 0) {
895 			uint8_t val = bitx64(pa, 16 + i, 16 + i);
896 			hashes[i] ^= val;
897 		}
898 
899 		if ((rule->ddr_flags & DF_DRAM_F_HASH_21_23) != 0) {
900 			uint8_t val = bitx64(pa, 21 + i, 21 + i);
901 			hashes[i] ^= val;
902 		}
903 
904 		if ((rule->ddr_flags & DF_DRAM_F_HASH_30_32) != 0) {
905 			uint8_t val = bitx64(pa, 30 + i, 30 + i);
906 			hashes[i] ^= val;
907 		}
908 
909 		if ((rule->ddr_flags & DF_DRAM_F_HASH_40_42) != 0) {
910 			uint8_t val = bitx64(pa, 40 + i, 40 + i);
911 			hashes[i] ^= val;
912 		}
913 	}
914 }
915 
916 /*
917  * See the big theory statement in zen_umc.c which describes the rules for this
918  * computation. This is a little less weird than the Zen 3 one, but still,
919  * unique.
920  */
921 static boolean_t
zen_umc_decode_ileave_nps_mod(const zen_umc_t * umc,zen_umc_decoder_t * dec)922 zen_umc_decode_ileave_nps_mod(const zen_umc_t *umc, zen_umc_decoder_t *dec)
923 {
924 	uint8_t hashes[3] = { 0 };
925 	uint32_t nsock_bit, chan_mod;
926 	const df_dram_rule_t *rule = dec->dec_df_rule;
927 
928 	if (rule->ddr_die_ileave_bits != 0) {
929 		dec->dec_fail = ZEN_UMC_DECODE_F_NPS_BAD_ILEAVE;
930 		dec->dec_fail_data = dec->dec_df_ruleno;
931 		return (B_FALSE);
932 	}
933 
934 	nsock_bit = rule->ddr_sock_ileave_bits;
935 	switch (rule->ddr_chan_ileave) {
936 	case DF_CHAN_ILEAVE_NPS4_3CH:
937 	case DF_CHAN_ILEAVE_NPS2_6CH:
938 	case DF_CHAN_ILEAVE_NPS1_12CH:
939 		chan_mod = 3;
940 		break;
941 	case DF_CHAN_ILEAVE_NPS2_5CH:
942 	case DF_CHAN_ILEAVE_NPS1_10CH:
943 		chan_mod = 5;
944 		break;
945 	default:
946 		dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP;
947 		dec->dec_fail_data = rule->ddr_chan_ileave;
948 		return (B_FALSE);
949 	}
950 
951 	dec->dec_ilv_sock = dec->dec_ilv_die = dec->dec_ilv_chan = 0;
952 	zen_umc_decode_hash_nps_mod(rule, dec->dec_ilv_pa, hashes);
953 
954 	if (nsock_bit > 0) {
955 		ASSERT3U(nsock_bit, ==, 1);
956 		dec->dec_ilv_sock = hashes[0];
957 	}
958 
959 	dec->dec_ilv_chan = bitx64(dec->dec_ilv_pa, 63, 14) % chan_mod;
960 	if (hashes[0] == 1) {
961 		dec->dec_ilv_chan = (dec->dec_ilv_chan + 1) % chan_mod;
962 	}
963 
964 	/*
965 	 * Use the remaining hash bits based on the number of channels. There is
966 	 * nothing else to do for 3/5 channel configs.
967 	 */
968 	switch (rule->ddr_chan_ileave) {
969 	case DF_CHAN_ILEAVE_NPS4_3CH:
970 	case DF_CHAN_ILEAVE_NPS2_5CH:
971 		break;
972 	case DF_CHAN_ILEAVE_NPS2_6CH:
973 	case DF_CHAN_ILEAVE_NPS1_10CH:
974 		dec->dec_ilv_chan += hashes[2] * chan_mod;
975 		break;
976 	case DF_CHAN_ILEAVE_NPS1_12CH:
977 		dec->dec_ilv_chan += ((hashes[2] << 1) | hashes[1]) * chan_mod;
978 		break;
979 	default:
980 		dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP;
981 		dec->dec_fail_data = rule->ddr_chan_ileave;
982 		return (B_FALSE);
983 	}
984 
985 	return (B_TRUE);
986 }
987 
988 /*
989  * Determine the interleave address for the NPS 1K/2K non-power of 2 based
990  * values. Each of these uses a similar style of calculation with rather
991  * different values and as such we use a data table for each of these that maps
992  * to a given rule.
993  */
994 static boolean_t
zen_umc_decode_ileave_nps_k_mod(const zen_umc_t * umc,zen_umc_decoder_t * dec)995 zen_umc_decode_ileave_nps_k_mod(const zen_umc_t *umc, zen_umc_decoder_t *dec)
996 {
997 	uint8_t hashes[4] = { 0 };
998 	uint32_t chan, mod_val;
999 	uint64_t mod_addr;
1000 	const df_dram_rule_t *rule = dec->dec_df_rule;
1001 	const zen_umc_np2_k_rule_t *np2 = NULL;
1002 
1003 	for (size_t i = 0; i < ARRAY_SIZE(zen_umc_np2_k_rules); i++) {
1004 		if (rule->ddr_chan_ileave == zen_umc_np2_k_rules[i].zukr_type) {
1005 			np2 = &zen_umc_np2_k_rules[i];
1006 			break;
1007 		}
1008 	}
1009 
1010 	if (np2 == NULL) {
1011 		dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP;
1012 		dec->dec_fail_data = rule->ddr_chan_ileave;
1013 		return (B_FALSE);
1014 	}
1015 
1016 	if (rule->ddr_die_ileave_bits != 0 || rule->ddr_addr_start != 8) {
1017 		dec->dec_fail = ZEN_UMC_DECODE_F_NPS_BAD_ILEAVE;
1018 		dec->dec_fail_data = dec->dec_df_ruleno;
1019 		return (B_FALSE);
1020 	}
1021 
1022 	/*
1023 	 * These rules either require that socket interleaving is enabled or
1024 	 * not. Make sure that this matches before we proceed.
1025 	 */
1026 	if (np2->zukr_sock != (rule->ddr_sock_ileave_bits == 1)) {
1027 		dec->dec_fail = ZEN_UMC_DECODE_F_NPS_BAD_ILEAVE;
1028 		dec->dec_fail_data = dec->dec_df_ruleno;
1029 		return (B_FALSE);
1030 	}
1031 
1032 	dec->dec_ilv_sock = dec->dec_ilv_die = dec->dec_ilv_chan = 0;
1033 	zen_umc_decode_hash_nps_k_mod(rule, dec->dec_ilv_pa, hashes);
1034 	if (rule->ddr_sock_ileave_bits > 0) {
1035 		ASSERT3U(rule->ddr_sock_ileave_bits, ==, 1);
1036 		dec->dec_ilv_sock = hashes[0];
1037 	}
1038 
1039 	mod_addr = bitx64(dec->dec_ilv_pa, 63, np2->zukr_high);
1040 	mod_addr = mod_addr << np2->zukr_mod_shift;
1041 	for (uint32_t i = 0; i < np2->zukr_mod_shift; i++) {
1042 		uint32_t bit = np2->zukr_mod_fill[i];
1043 		if (bit != 0) {
1044 			uint64_t val = bitx64(dec->dec_ilv_pa, bit, bit);
1045 			mod_addr = bitset64(mod_addr, i, i, val);
1046 		}
1047 	}
1048 
1049 	mod_val = (uint32_t)(mod_addr % np2->zukr_mod);
1050 	chan = mod_val << np2->zukr_chan_mod_shift;
1051 	for (uint32_t i = 0; i < np2->zukr_chan_mod_shift; i++) {
1052 		VERIFY3U(np2->zukr_chan_fill[i], <, ARRAY_SIZE(hashes));
1053 		uint32_t bit = np2->zukr_chan_fill[i];
1054 		uint32_t val = hashes[np2->zukr_chan_fill[i]];
1055 		chan = bitset32(chan, bit, bit, val);
1056 	}
1057 
1058 	dec->dec_ilv_chan = chan;
1059 	return (B_TRUE);
1060 }
1061 
1062 /*
1063  * Our next task is to attempt to translate the PA and the DF rule from a system
1064  * address into a normalized address and a particular DRAM channel that it's
1065  * targeting. There are several things that we need to take into account here
1066  * when performing interleaving and translation:
1067  *
1068  *  o The DRAM Hole modifying our base address
1069  *  o The various interleave bits
1070  *  o Potentially hashing based on channel and global settings
1071  *  o Potential CS re-targeting registers (only on some systems)
1072  *  o Finally, the question of how to adjust for the DRAM hole and the base
1073  *    address changes based on the DF generation and channel configuration. This
1074  *    influences what address we start interleaving with.
1075  *
1076  * Note, this phase does not actually construct the normalized (e.g. channel)
1077  * address. That's done in a subsequent step. For more background, please see
1078  * the 'Data Fabric Interleaving' section of the zen_umc.c big theory statement.
1079  */
1080 static boolean_t
zen_umc_decode_sysaddr_to_csid(const zen_umc_t * umc,zen_umc_decoder_t * dec)1081 zen_umc_decode_sysaddr_to_csid(const zen_umc_t *umc, zen_umc_decoder_t *dec)
1082 {
1083 	uint32_t sock, die, chan, remap_ruleset;
1084 	const df_dram_rule_t *rule = dec->dec_df_rule;
1085 	const zen_umc_cs_remap_t *remap;
1086 
1087 	/*
1088 	 * First, we must determine what the actual address used for
1089 	 * interleaving is. This varies based on the interleaving and DF
1090 	 * generation.
1091 	 */
1092 	if (!zen_umc_determine_ileave_addr(umc, dec)) {
1093 		return (B_FALSE);
1094 	}
1095 
1096 	switch (rule->ddr_chan_ileave) {
1097 	case DF_CHAN_ILEAVE_1CH:
1098 	case DF_CHAN_ILEAVE_2CH:
1099 	case DF_CHAN_ILEAVE_4CH:
1100 	case DF_CHAN_ILEAVE_8CH:
1101 	case DF_CHAN_ILEAVE_16CH:
1102 	case DF_CHAN_ILEAVE_32CH:
1103 		if (!zen_umc_decode_ileave_nohash(umc, dec)) {
1104 			return (B_FALSE);
1105 		}
1106 		break;
1107 	case DF_CHAN_ILEAVE_COD4_2CH:
1108 	case DF_CHAN_ILEAVE_COD2_4CH:
1109 	case DF_CHAN_ILEAVE_COD1_8CH:
1110 		if (!zen_umc_decode_ileave_cod(umc, dec)) {
1111 			return (B_FALSE);
1112 		}
1113 		break;
1114 	case DF_CHAN_ILEAVE_NPS4_2CH:
1115 	case DF_CHAN_ILEAVE_NPS2_4CH:
1116 	case DF_CHAN_ILEAVE_NPS1_8CH:
1117 		if (!zen_umc_decode_ileave_nps(umc, dec)) {
1118 			return (B_FALSE);
1119 		}
1120 		break;
1121 	case DF_CHAN_ILEAVE_6CH:
1122 		if (!zen_umc_decode_ileave_zen3_6ch(umc, dec)) {
1123 			return (B_FALSE);
1124 		}
1125 		break;
1126 	case DF_CHAN_ILEAVE_NPS4_3CH:
1127 	case DF_CHAN_ILEAVE_NPS2_6CH:
1128 	case DF_CHAN_ILEAVE_NPS1_12CH:
1129 	case DF_CHAN_ILEAVE_NPS2_5CH:
1130 	case DF_CHAN_ILEAVE_NPS1_10CH:
1131 		if (!zen_umc_decode_ileave_nps_mod(umc, dec)) {
1132 			return (B_FALSE);
1133 		}
1134 		break;
1135 	case DF_CHAN_ILEAVE_NPS4_2CH_1K:
1136 	case DF_CHAN_ILEAVE_NPS2_4CH_1K:
1137 	case DF_CHAN_ILEAVE_NPS1_8CH_1K:
1138 	case DF_CHAN_ILEAVE_NPS1_16CH_1K:
1139 	case DF_CHAN_ILEAVE_NPS4_2CH_2K:
1140 	case DF_CHAN_ILEAVE_NPS2_4CH_2K:
1141 	case DF_CHAN_ILEAVE_NPS1_8CH_2K:
1142 	case DF_CHAN_ILEAVE_NPS1_16CH_2K:
1143 		if (!zen_umc_decode_ileave_nps_k(umc, dec)) {
1144 			return (B_FALSE);
1145 		}
1146 		break;
1147 	case DF_CHAN_ILEAVE_NPS4_3CH_1K:
1148 	case DF_CHAN_ILEAVE_NPS2_6CH_1K:
1149 	case DF_CHAN_ILEAVE_NPS1_12CH_1K:
1150 	case DF_CHAN_ILEAVE_NPS0_24CH_1K:
1151 	case DF_CHAN_ILEAVE_NPS2_5CH_1K:
1152 	case DF_CHAN_ILEAVE_NPS1_10CH_1K:
1153 	case DF_CHAN_ILEAVE_NPS4_3CH_2K:
1154 	case DF_CHAN_ILEAVE_NPS2_6CH_2K:
1155 	case DF_CHAN_ILEAVE_NPS1_12CH_2K:
1156 	case DF_CHAN_ILEAVE_NPS0_24CH_2K:
1157 	case DF_CHAN_ILEAVE_NPS2_5CH_2K:
1158 	case DF_CHAN_ILEAVE_NPS1_10CH_2K:
1159 		if (!zen_umc_decode_ileave_nps_k_mod(umc, dec)) {
1160 			return (B_FALSE);
1161 		}
1162 		break;
1163 	case DF_CHAN_ILEAVE_MI3H_8CH:
1164 	case DF_CHAN_ILEAVE_MI3H_16CH:
1165 	case DF_CHAN_ILEAVE_MI3H_32CH:
1166 	default:
1167 		dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP;
1168 		dec->dec_fail_data = rule->ddr_chan_ileave;
1169 		return (B_FALSE);
1170 	}
1171 
1172 	/*
1173 	 * At this point we have dealt with decoding the interleave into the
1174 	 * logical elements that it contains. We need to transform that back
1175 	 * into a fabric ID, so we can add it to the base fabric ID in our rule.
1176 	 * After that, we need to see if there is any CS remapping going on. If
1177 	 * there is, we will replace the component part of the decomposed fabric
1178 	 * ID. With that done, we can then transform the components back into
1179 	 * our target fabric ID, which indicates which UMC we're after.
1180 	 */
1181 	zen_fabric_id_compose(&umc->umc_decomp, dec->dec_ilv_sock,
1182 	    dec->dec_ilv_die, dec->dec_ilv_chan, &dec->dec_ilv_fabid);
1183 	dec->dec_log_fabid = dec->dec_ilv_fabid + rule->ddr_dest_fabid;
1184 
1185 	/*
1186 	 * If there's no remapping to do, then we're done. Simply assign the
1187 	 * logical ID as our target.
1188 	 */
1189 	zen_fabric_id_decompose(&umc->umc_decomp, dec->dec_log_fabid, &sock,
1190 	    &die, &chan);
1191 	if ((rule->ddr_flags & DF_DRAM_F_REMAP_EN) == 0) {
1192 		dec->dec_targ_fabid = dec->dec_log_fabid;
1193 		return (B_TRUE);
1194 	}
1195 
1196 	/*
1197 	 * The DF contains multiple remapping tables. We must figure out which
1198 	 * of these to actually use. There are two different ways that this can
1199 	 * work. The first way is the one added in DFv4 and is used since then.
1200 	 * In that case, the DRAM rule includes both that remapping was enabled
1201 	 * and which of the multiple mapping tables to use.
1202 	 *
1203 	 * This feature also exists prior to DFv4, but only in Milan. In that
1204 	 * world, indicated by the DF_DRAM_F_REMAP_SOCK flag, there is one table
1205 	 * in each DF per-socket. Based on the destination socket from the data
1206 	 * fabric ID, you pick the actual table to use.
1207 	 *
1208 	 * Once the table has been selected, we maintain the socket and die
1209 	 * portions of the fabric ID as constants and replace the component with
1210 	 * the one the remapping table indicates.
1211 	 *
1212 	 * Technically each DF has its own copy of the remapping tables. To make
1213 	 * this work we rely on the following assumption: a given DF node has to
1214 	 * be able to fully route all DRAM rules to a target. That is, a given
1215 	 * DF node doesn't really forward a system address to the remote die for
1216 	 * further interleave processing and therefore we must have enough
1217 	 * information here to map it totally from the same DF that we got the
1218 	 * CCM rules from in the first place, DF 0.
1219 	 */
1220 	if ((rule->ddr_flags & DF_DRAM_F_REMAP_SOCK) != 0) {
1221 		remap_ruleset = sock;
1222 	} else {
1223 		remap_ruleset = rule->ddr_remap_ent;
1224 	}
1225 
1226 	if (remap_ruleset >= dec->dec_df_rulesrc->zud_cs_nremap) {
1227 		dec->dec_fail = ZEN_UMC_DECODE_F_BAD_REMAP_SET;
1228 		dec->dec_fail_data = remap_ruleset;
1229 		return (B_FALSE);
1230 	}
1231 
1232 	remap = &dec->dec_df_rulesrc->zud_remap[remap_ruleset];
1233 	if (chan >= remap->csr_nremaps) {
1234 		dec->dec_fail = ZEN_UMC_DECODE_F_BAD_REMAP_ENTRY;
1235 		dec->dec_fail_data = chan;
1236 		return (B_FALSE);
1237 	}
1238 
1239 	dec->dec_remap_comp = remap->csr_remaps[chan];
1240 	if ((dec->dec_remap_comp & ~umc->umc_decomp.dfd_comp_mask) != 0) {
1241 		dec->dec_fail = ZEN_UMC_DECODE_F_REMAP_HAS_BAD_COMP;
1242 		dec->dec_fail_data = dec->dec_remap_comp;
1243 		return (B_FALSE);
1244 	}
1245 
1246 	zen_fabric_id_compose(&umc->umc_decomp, sock, die, dec->dec_remap_comp,
1247 	    &dec->dec_targ_fabid);
1248 
1249 	return (B_TRUE);
1250 }
1251 
1252 /*
1253  * Our next step here is to actually take our target ID and find the
1254  * corresponding DF, UMC, and actual rule that was used. Note, we don't
1255  * decompose the ID and look things up that way for a few reasons. While each
1256  * UMC should map linearly to its instance/component ID, there are suggestions
1257  * that they can be renumbered. This makes it simplest to just walk over
1258  * everything (and there aren't that many things to walk over either).
1259  */
1260 static boolean_t
zen_umc_decode_find_umc_rule(const zen_umc_t * umc,zen_umc_decoder_t * dec)1261 zen_umc_decode_find_umc_rule(const zen_umc_t *umc, zen_umc_decoder_t *dec)
1262 {
1263 	for (uint_t dfno = 0; dfno < umc->umc_ndfs; dfno++) {
1264 		const zen_umc_df_t *df = &umc->umc_dfs[dfno];
1265 		for (uint_t umcno = 0; umcno < df->zud_nchan; umcno++) {
1266 			const zen_umc_chan_t *chan = &df->zud_chan[umcno];
1267 
1268 			if (chan->chan_fabid != dec->dec_targ_fabid) {
1269 				continue;
1270 			}
1271 
1272 			/*
1273 			 * At this point we have found the UMC that we were
1274 			 * looking for. Snapshot that and then figure out which
1275 			 * rule index of it corresponds to our mapping so we can
1276 			 * properly determine an offset. We will still use the
1277 			 * primary CCM rule for all other calculations.
1278 			 */
1279 			dec->dec_umc_chan = chan;
1280 			for (uint32_t ruleno = 0; ruleno < chan->chan_nrules;
1281 			    ruleno++) {
1282 				const df_dram_rule_t *rule =
1283 				    &chan->chan_rules[ruleno];
1284 				if ((rule->ddr_flags & DF_DRAM_F_VALID) == 0) {
1285 					continue;
1286 				}
1287 
1288 				if (dec->dec_pa >= rule->ddr_base &&
1289 				    dec->dec_pa < rule->ddr_limit) {
1290 					dec->dec_umc_ruleno = ruleno;
1291 					return (B_TRUE);
1292 				}
1293 			}
1294 
1295 			dec->dec_fail = ZEN_UMC_DECODE_F_UMC_DOESNT_HAVE_PA;
1296 			return (B_FALSE);
1297 		}
1298 	}
1299 
1300 	dec->dec_fail = ZEN_UMC_DECODE_F_CANNOT_MAP_FABID;
1301 	return (B_FALSE);
1302 }
1303 
1304 /*
1305  * Non-hashing interleave modes system address normalization logic. See the
1306  * zen_umc.c big theory statement for more information.
1307  */
1308 static boolean_t
zen_umc_decode_normalize_nohash(const zen_umc_t * umc,zen_umc_decoder_t * dec)1309 zen_umc_decode_normalize_nohash(const zen_umc_t *umc, zen_umc_decoder_t *dec)
1310 {
1311 	uint_t nbits = 0;
1312 	const df_dram_rule_t *rule = dec->dec_df_rule;
1313 
1314 	nbits += rule->ddr_sock_ileave_bits;
1315 	nbits += rule->ddr_die_ileave_bits;
1316 	switch (rule->ddr_chan_ileave) {
1317 	case DF_CHAN_ILEAVE_1CH:
1318 		break;
1319 	case DF_CHAN_ILEAVE_2CH:
1320 		nbits += 1;
1321 		break;
1322 	case DF_CHAN_ILEAVE_4CH:
1323 		nbits += 2;
1324 		break;
1325 	case DF_CHAN_ILEAVE_8CH:
1326 		nbits += 3;
1327 		break;
1328 	case DF_CHAN_ILEAVE_16CH:
1329 		nbits += 4;
1330 		break;
1331 	case DF_CHAN_ILEAVE_32CH:
1332 		nbits += 5;
1333 		break;
1334 	default:
1335 		dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP;
1336 		dec->dec_fail_data = rule->ddr_chan_ileave;
1337 		return (B_FALSE);
1338 	}
1339 
1340 	/*
1341 	 * If we have a really simple configuration (e.g. no interleaving at
1342 	 * all), then make sure that we do not actually do anything here.
1343 	 */
1344 	if (nbits > 0) {
1345 		dec->dec_norm_addr = bitdel64(dec->dec_norm_addr,
1346 		    rule->ddr_addr_start + nbits - 1, rule->ddr_addr_start);
1347 	}
1348 
1349 	return (B_TRUE);
1350 }
1351 
1352 /*
1353  * COD/NPS system address normalization logic. See the zen_umc.c big theory
1354  * statement for more information.
1355  */
1356 static boolean_t
zen_umc_decode_normalize_hash(const zen_umc_t * umc,zen_umc_decoder_t * dec)1357 zen_umc_decode_normalize_hash(const zen_umc_t *umc, zen_umc_decoder_t *dec)
1358 {
1359 	uint_t nbits = 0, nstart = 0;
1360 	const df_dram_rule_t *rule = dec->dec_df_rule;
1361 
1362 	/*
1363 	 * NPS 1K hashes remove bits 8 and 9 first. Determine how many bits to
1364 	 * remove from the starting location. This will later be reduced based
1365 	 * upon how many address bits there actually are.
1366 	 */
1367 	switch (rule->ddr_chan_ileave) {
1368 	case DF_CHAN_ILEAVE_NPS4_2CH_1K:
1369 	case DF_CHAN_ILEAVE_NPS2_4CH_1K:
1370 	case DF_CHAN_ILEAVE_NPS1_8CH_1K:
1371 	case DF_CHAN_ILEAVE_NPS1_16CH_1K:
1372 		nstart = 2;
1373 		break;
1374 	default:
1375 		nstart = 1;
1376 		break;
1377 	}
1378 
1379 	/*
1380 	 * NPS hashes allow for socket interleaving, COD hashes do not. Add
1381 	 * socket interleaving, skip die.
1382 	 */
1383 	nbits += rule->ddr_sock_ileave_bits;
1384 	switch (rule->ddr_chan_ileave) {
1385 	case DF_CHAN_ILEAVE_COD4_2CH:
1386 	case DF_CHAN_ILEAVE_NPS4_2CH:
1387 	case DF_CHAN_ILEAVE_NPS4_2CH_1K:
1388 	case DF_CHAN_ILEAVE_NPS4_2CH_2K:
1389 		nbits += 1;
1390 		break;
1391 	case DF_CHAN_ILEAVE_COD2_4CH:
1392 	case DF_CHAN_ILEAVE_NPS2_4CH:
1393 	case DF_CHAN_ILEAVE_NPS2_4CH_1K:
1394 	case DF_CHAN_ILEAVE_NPS2_4CH_2K:
1395 		nbits += 2;
1396 		break;
1397 	case DF_CHAN_ILEAVE_COD1_8CH:
1398 	case DF_CHAN_ILEAVE_NPS1_8CH:
1399 	case DF_CHAN_ILEAVE_NPS1_8CH_1K:
1400 	case DF_CHAN_ILEAVE_NPS1_8CH_2K:
1401 		nbits += 3;
1402 		break;
1403 	case DF_CHAN_ILEAVE_NPS1_16CH_1K:
1404 	case DF_CHAN_ILEAVE_NPS1_16CH_2K:
1405 		nbits += 4;
1406 		break;
1407 	default:
1408 		dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP;
1409 		dec->dec_fail_data = rule->ddr_chan_ileave;
1410 	}
1411 
1412 	/*
1413 	 * Don't remove more bits from the start than exist.
1414 	 */
1415 	if (nstart > nbits) {
1416 		nstart = nbits;
1417 	}
1418 
1419 	/*
1420 	 * Always remove high order bits before low order bits so we don't have
1421 	 * to adjust the bits we need to remove.
1422 	 */
1423 	if (nbits > nstart) {
1424 		uint_t start = 12;
1425 		uint_t end = start + (nbits - nstart - 1);
1426 		dec->dec_norm_addr = bitdel64(dec->dec_norm_addr, end, start);
1427 	}
1428 
1429 	dec->dec_norm_addr = bitdel64(dec->dec_norm_addr,
1430 	    rule->ddr_addr_start + nstart - 1, rule->ddr_addr_start);
1431 	return (B_TRUE);
1432 }
1433 
1434 /*
1435  * Now it's time to perform normalization of our favorite interleaving type.
1436  * Please see the comments in zen_umc.c on this to understand what we're doing
1437  * here and why.
1438  */
1439 static boolean_t
zen_umc_decode_normalize_zen3_6ch(const zen_umc_t * umc,zen_umc_decoder_t * dec)1440 zen_umc_decode_normalize_zen3_6ch(const zen_umc_t *umc, zen_umc_decoder_t *dec)
1441 {
1442 	uint8_t hashes[3] = { 0 };
1443 	uint_t start, end;
1444 	const df_dram_rule_t *rule = dec->dec_df_rule;
1445 
1446 	/*
1447 	 * As per the theory statement, we always remove the hash bits here from
1448 	 * the starting address. Because this is a 6-channel config, that turns
1449 	 * into 3. Perform the hash again first.
1450 	 */
1451 	zen_umc_decode_hash_zen3_6ch(rule, dec->dec_norm_addr, hashes);
1452 	start = rule->ddr_addr_start;
1453 	end = rule->ddr_addr_start + ZEN_UMC_COD_NBITS - 1;
1454 	dec->dec_norm_addr = bitdel64(dec->dec_norm_addr, end, start);
1455 
1456 	/*
1457 	 * This is the case the theory statement warned about. This gets
1458 	 * normalized to the top of the DIMM's range (its two upper most bits
1459 	 * are set).
1460 	 */
1461 	if (hashes[1] == 1 && hashes[2] == 1) {
1462 		uint_t start = 14 - ZEN_UMC_COD_NBITS +
1463 		    dec->dec_umc_chan->chan_np2_space0;
1464 		dec->dec_norm_addr = bitset64(dec->dec_norm_addr, start + 1,
1465 		    start, 0x3);
1466 	}
1467 
1468 	return (B_TRUE);
1469 }
1470 
1471 /*
1472  * Based on the algorithm of sorts described in zen_umc.c, we have a few
1473  * different phases of extraction and combination. This isn't quite like the
1474  * others where we simply delete bits.
1475  */
1476 static boolean_t
zen_umc_decode_normalize_nps_mod(const zen_umc_t * umc,zen_umc_decoder_t * dec)1477 zen_umc_decode_normalize_nps_mod(const zen_umc_t *umc, zen_umc_decoder_t *dec)
1478 {
1479 	uint64_t low, high, mid;
1480 	uint_t nbits, chan_mod, sock_bits, nmid_bits;
1481 	uint_t mid_start, mid_end;
1482 	uint8_t hashes[3] = { 0 };
1483 	const df_dram_rule_t *rule = dec->dec_df_rule;
1484 
1485 	sock_bits = rule->ddr_sock_ileave_bits;
1486 	switch (rule->ddr_chan_ileave) {
1487 	case DF_CHAN_ILEAVE_NPS4_3CH:
1488 		chan_mod = 3;
1489 		nbits = 1;
1490 		break;
1491 	case DF_CHAN_ILEAVE_NPS2_5CH:
1492 		chan_mod = 5;
1493 		nbits = 1;
1494 		break;
1495 	case DF_CHAN_ILEAVE_NPS2_6CH:
1496 		chan_mod = 3;
1497 		nbits = 2;
1498 		break;
1499 	case DF_CHAN_ILEAVE_NPS1_10CH:
1500 		chan_mod = 5;
1501 		nbits = 2;
1502 		break;
1503 	case DF_CHAN_ILEAVE_NPS1_12CH:
1504 		chan_mod = 3;
1505 		nbits = 3;
1506 		break;
1507 	default:
1508 		dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP;
1509 		dec->dec_fail_data = rule->ddr_chan_ileave;
1510 		return (B_FALSE);
1511 	}
1512 
1513 	/*
1514 	 * First extract the low bit range that we're using which is everything
1515 	 * below the starting interleave address. We also always extract the
1516 	 * high bits, which are always [63:14] and divide it by the modulus.
1517 	 * Note, we apply the hash after any such division if needed. It becomes
1518 	 * the new least significant bit.
1519 	 */
1520 	low = bitx64(dec->dec_norm_addr, rule->ddr_addr_start - 1, 0);
1521 	high = bitx64(dec->dec_norm_addr, 63, 14) / chan_mod;
1522 	zen_umc_decode_hash_nps_mod(rule, dec->dec_norm_addr, hashes);
1523 	if (sock_bits == 0) {
1524 		high = (high << 1) | hashes[0];
1525 	}
1526 
1527 	/*
1528 	 * Now for the weirdest bit here, extracting the middle bits. Recall
1529 	 * this hash uses bit 8, then 13, then 12 (the hash order is still 8,
1530 	 * 12, 13, but it uses the hashes[2] before hashes[1] in
1531 	 * zen_umc_decode_ileave_nps_mod()). So if we're only using 1 interleave
1532 	 * bit, we just remove bit 8 (assuming that is our starting address) and
1533 	 * our range is [13:9]. If we're using two, our range becomes [12:9],
1534 	 * and if three, [11:9]. The 6 - nbits below comes from the fact that in
1535 	 * a 1 bit interleave we have 5 bits. Because our mid_start/mid_end
1536 	 * range is inclusive, we subtract one at the end from mid_end.
1537 	 */
1538 	nmid_bits = 6 - nbits;
1539 	mid_start = rule->ddr_addr_start + 1;
1540 	mid_end = mid_start + nmid_bits - 1;
1541 	mid = bitx64(dec->dec_norm_addr, mid_end, mid_start);
1542 
1543 	/*
1544 	 * Because we've been removing bits, we don't use any of the start and
1545 	 * ending ranges we calculated above for shifts, as that was what we
1546 	 * needed from the original address.
1547 	 */
1548 	dec->dec_norm_addr = low | (mid << rule->ddr_addr_start) | (high <<
1549 	    (rule->ddr_addr_start + nmid_bits));
1550 
1551 	return (B_TRUE);
1552 }
1553 
1554 /*
1555  * Construct the normalized address for the NPS 1K/2K non-power of 2 instances.
1556  * See the theory statement for the rough formula used here. While each variant
1557  * uses slightly different values, that has been abstracted based on our data
1558  * table.
1559  */
1560 static boolean_t
zen_umc_decode_normalize_nps_k_mod(const zen_umc_t * umc,zen_umc_decoder_t * dec)1561 zen_umc_decode_normalize_nps_k_mod(const zen_umc_t *umc, zen_umc_decoder_t *dec)
1562 {
1563 	uint64_t high, mid, low;
1564 	uint_t mid_end;
1565 	const df_dram_rule_t *rule = dec->dec_df_rule;
1566 	const zen_umc_np2_k_rule_t *np2 = NULL;
1567 
1568 	for (size_t i = 0; i < ARRAY_SIZE(zen_umc_np2_k_rules); i++) {
1569 		if (rule->ddr_chan_ileave == zen_umc_np2_k_rules[i].zukr_type) {
1570 			np2 = &zen_umc_np2_k_rules[i];
1571 			break;
1572 		}
1573 	}
1574 
1575 	if (np2 == NULL) {
1576 		dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP;
1577 		dec->dec_fail_data = rule->ddr_chan_ileave;
1578 		return (B_FALSE);
1579 	}
1580 
1581 	low = bitx64(dec->dec_norm_addr, rule->ddr_addr_start - 1, 0);
1582 	mid_end = np2->zukr_norm_addr + np2->zukr_norm_naddr - 1;
1583 	VERIFY3U(mid_end, >=, rule->ddr_addr_start);
1584 	mid = bitx64(dec->dec_norm_addr, mid_end, np2->zukr_norm_addr);
1585 
1586 	high = bitx64(dec->dec_norm_addr, 63, np2->zukr_high);
1587 	if (np2->zukr_div_naddr > 0) {
1588 		uint_t ins_end = np2->zukr_div_addr + np2->zukr_div_naddr - 1;
1589 		uint64_t insert = bitx64(dec->dec_norm_addr, ins_end,
1590 		    np2->zukr_div_addr);
1591 
1592 		high = high << np2->zukr_div_naddr;
1593 		high = bitset64(high, np2->zukr_div_naddr - 1, 0, insert);
1594 	}
1595 	high = high / np2->zukr_mod;
1596 
1597 	dec->dec_norm_addr = low | (mid << rule->ddr_addr_start) | (high <<
1598 	    (rule->ddr_addr_start + np2->zukr_norm_naddr));
1599 	return (B_TRUE);
1600 }
1601 
1602 /*
1603  * Now we need to go through and try to construct a normalized address using all
1604  * the information that we've gathered to date. To do this we need to take into
1605  * account all of the following transformations on the address that need to
1606  * occur. We apply modifications to the address in the following order:
1607  *
1608  *   o The base address of the rule
1609  *   o DRAM hole changes
1610  *   o Normalization of the address due to interleaving (more fun)
1611  *   o The DRAM offset register of the rule
1612  */
1613 static boolean_t
zen_umc_decode_sysaddr_to_norm(const zen_umc_t * umc,zen_umc_decoder_t * dec)1614 zen_umc_decode_sysaddr_to_norm(const zen_umc_t *umc, zen_umc_decoder_t *dec)
1615 {
1616 	const zen_umc_chan_t *chan = dec->dec_umc_chan;
1617 	const df_dram_rule_t *rule = dec->dec_df_rule;
1618 
1619 	dec->dec_norm_addr = dec->dec_pa;
1620 	if (!zen_umc_adjust_dram_addr(umc, dec, &dec->dec_norm_addr,
1621 	    ZEN_UMC_DECODE_F_CALC_NORM_UNDERFLOW)) {
1622 		return (B_FALSE);
1623 	}
1624 
1625 	/*
1626 	 * Now for the most annoying part of this whole thing, normalizing based
1627 	 * on our actual interleave format. The reason for this is that when
1628 	 * interleaving is going on, it actually is removing bits that are just
1629 	 * being used to direct it somewhere; however, it's actually generally
1630 	 * speaking the same value in each location. See the big theory
1631 	 * statement in zen_umc.c for more information.
1632 	 */
1633 	switch (rule->ddr_chan_ileave) {
1634 	case DF_CHAN_ILEAVE_1CH:
1635 	case DF_CHAN_ILEAVE_2CH:
1636 	case DF_CHAN_ILEAVE_4CH:
1637 	case DF_CHAN_ILEAVE_8CH:
1638 	case DF_CHAN_ILEAVE_16CH:
1639 	case DF_CHAN_ILEAVE_32CH:
1640 		if (!zen_umc_decode_normalize_nohash(umc, dec)) {
1641 			return (B_FALSE);
1642 		}
1643 		break;
1644 	case DF_CHAN_ILEAVE_COD4_2CH:
1645 	case DF_CHAN_ILEAVE_COD2_4CH:
1646 	case DF_CHAN_ILEAVE_COD1_8CH:
1647 	case DF_CHAN_ILEAVE_NPS4_2CH:
1648 	case DF_CHAN_ILEAVE_NPS2_4CH:
1649 	case DF_CHAN_ILEAVE_NPS1_8CH:
1650 	case DF_CHAN_ILEAVE_NPS4_2CH_1K:
1651 	case DF_CHAN_ILEAVE_NPS2_4CH_1K:
1652 	case DF_CHAN_ILEAVE_NPS1_8CH_1K:
1653 	case DF_CHAN_ILEAVE_NPS1_16CH_1K:
1654 	case DF_CHAN_ILEAVE_NPS4_2CH_2K:
1655 	case DF_CHAN_ILEAVE_NPS2_4CH_2K:
1656 	case DF_CHAN_ILEAVE_NPS1_8CH_2K:
1657 	case DF_CHAN_ILEAVE_NPS1_16CH_2K:
1658 		if (!zen_umc_decode_normalize_hash(umc, dec)) {
1659 			return (B_FALSE);
1660 		}
1661 		break;
1662 	case DF_CHAN_ILEAVE_6CH:
1663 		if (!zen_umc_decode_normalize_zen3_6ch(umc, dec)) {
1664 			return (B_FALSE);
1665 		}
1666 		break;
1667 	case DF_CHAN_ILEAVE_NPS4_3CH:
1668 	case DF_CHAN_ILEAVE_NPS2_6CH:
1669 	case DF_CHAN_ILEAVE_NPS1_12CH:
1670 	case DF_CHAN_ILEAVE_NPS2_5CH:
1671 	case DF_CHAN_ILEAVE_NPS1_10CH:
1672 		if (!zen_umc_decode_normalize_nps_mod(umc, dec)) {
1673 			return (B_FALSE);
1674 		}
1675 		break;
1676 	case DF_CHAN_ILEAVE_NPS4_3CH_1K:
1677 	case DF_CHAN_ILEAVE_NPS2_6CH_1K:
1678 	case DF_CHAN_ILEAVE_NPS1_12CH_1K:
1679 	case DF_CHAN_ILEAVE_NPS0_24CH_1K:
1680 	case DF_CHAN_ILEAVE_NPS2_5CH_1K:
1681 	case DF_CHAN_ILEAVE_NPS1_10CH_1K:
1682 	case DF_CHAN_ILEAVE_NPS4_3CH_2K:
1683 	case DF_CHAN_ILEAVE_NPS2_6CH_2K:
1684 	case DF_CHAN_ILEAVE_NPS1_12CH_2K:
1685 	case DF_CHAN_ILEAVE_NPS0_24CH_2K:
1686 	case DF_CHAN_ILEAVE_NPS2_5CH_2K:
1687 	case DF_CHAN_ILEAVE_NPS1_10CH_2K:
1688 		if (!zen_umc_decode_normalize_nps_k_mod(umc, dec)) {
1689 			return (B_FALSE);
1690 		}
1691 		break;
1692 	case DF_CHAN_ILEAVE_MI3H_8CH:
1693 	case DF_CHAN_ILEAVE_MI3H_16CH:
1694 	case DF_CHAN_ILEAVE_MI3H_32CH:
1695 	default:
1696 		dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP;
1697 		dec->dec_fail_data = rule->ddr_chan_ileave;
1698 		return (B_FALSE);
1699 	}
1700 
1701 	/*
1702 	 * Determine if this rule has an offset to apply. Note, there is never
1703 	 * an offset for rule 0, hence the index into this is one less than the
1704 	 * actual rule number. Unlike other transformations these offsets
1705 	 * describe the start of a normalized range. Therefore we need to
1706 	 * actually add this value instead of subtract.
1707 	 */
1708 	if (dec->dec_umc_ruleno > 0) {
1709 		uint32_t offno = dec->dec_umc_ruleno - 1;
1710 		const chan_offset_t *offset = &chan->chan_offsets[offno];
1711 
1712 		if (offset->cho_valid) {
1713 			dec->dec_norm_addr += offset->cho_offset;
1714 		}
1715 	}
1716 
1717 	return (B_TRUE);
1718 }
1719 
1720 /*
1721  * This applies the formula that determines a chip-select actually matches which
1722  * is defined as (address & ~mask) == (base & ~mask) in the PPR. There is both a
1723  * primary and secondary mask here. We need to pay attention to which is used
1724  * (if any) for later on.
1725  */
1726 static boolean_t
zen_umc_decoder_cs_matches(const umc_cs_t * cs,const uint64_t norm,boolean_t * matched_sec)1727 zen_umc_decoder_cs_matches(const umc_cs_t *cs, const uint64_t norm,
1728     boolean_t *matched_sec)
1729 {
1730 	if (cs->ucs_base.udb_valid != 0) {
1731 		uint64_t imask = ~cs->ucs_base_mask;
1732 		if ((norm & imask) == (cs->ucs_base.udb_base & imask)) {
1733 			*matched_sec = B_FALSE;
1734 			return (B_TRUE);
1735 		}
1736 	}
1737 
1738 	if (cs->ucs_sec.udb_valid != 0) {
1739 		uint64_t imask = ~cs->ucs_sec_mask;
1740 		if ((norm & imask) == (cs->ucs_sec.udb_base & imask)) {
1741 			*matched_sec = B_TRUE;
1742 			return (B_TRUE);
1743 		}
1744 	}
1745 
1746 	return (B_FALSE);
1747 }
1748 
1749 /*
1750  * Go through with our normalized address and map it to a given chip-select.
1751  * This as a side effect indicates which DIMM we're going out on as well. Note,
1752  * the final DIMM can change due to chip-select hashing; however, we use this
1753  * DIMM for determining all of the actual address translations.
1754  */
1755 static boolean_t
zen_umc_decode_find_cs(const zen_umc_t * umc,zen_umc_decoder_t * dec)1756 zen_umc_decode_find_cs(const zen_umc_t *umc, zen_umc_decoder_t *dec)
1757 {
1758 	const zen_umc_chan_t *chan = dec->dec_umc_chan;
1759 
1760 	for (uint_t dimmno = 0; dimmno < ZEN_UMC_MAX_DIMMS; dimmno++) {
1761 		const umc_dimm_t *dimm = &chan->chan_dimms[dimmno];
1762 
1763 		if ((dimm->ud_flags & UMC_DIMM_F_VALID) == 0)
1764 			continue;
1765 
1766 		for (uint_t csno = 0; csno < ZEN_UMC_MAX_CS_PER_DIMM; csno++) {
1767 			const umc_cs_t *cs = &dimm->ud_cs[csno];
1768 			boolean_t is_sec = B_FALSE;
1769 
1770 			if (zen_umc_decoder_cs_matches(cs, dec->dec_norm_addr,
1771 			    &is_sec)) {
1772 				dec->dec_dimm = dimm;
1773 				dec->dec_cs = cs;
1774 				dec->dec_log_csno = dimmno * ZEN_UMC_MAX_DIMMS +
1775 				    csno;
1776 				dec->dec_cs_sec = is_sec;
1777 				return (B_TRUE);
1778 			}
1779 		}
1780 	}
1781 
1782 	dec->dec_fail = ZEN_UMC_DECODE_F_NO_CS_BASE_MATCH;
1783 	return (B_FALSE);
1784 }
1785 
1786 /*
1787  * Extract the column from the address. For once, something that is almost
1788  * straightforward.
1789  */
1790 static boolean_t
zen_umc_decode_cols(const zen_umc_t * umc,zen_umc_decoder_t * dec)1791 zen_umc_decode_cols(const zen_umc_t *umc, zen_umc_decoder_t *dec)
1792 {
1793 	uint32_t cols = 0;
1794 	const umc_cs_t *cs = dec->dec_cs;
1795 
1796 	for (uint_t i = 0; i < cs->ucs_ncol; i++) {
1797 		uint32_t index;
1798 
1799 		index = cs->ucs_col_bits[i];
1800 		cols |= bitx64(dec->dec_norm_addr, index, index) << i;
1801 	}
1802 
1803 	dec->dec_dimm_col = cols;
1804 	return (B_TRUE);
1805 }
1806 
1807 /*
1808  * The row is split into two different regions. There's a low and high value,
1809  * though the high value is only present in DDR4. Unlike the column, where each
1810  * bit is spelled out, each set of row bits are contiguous (low and high are
1811  * independent).
1812  */
1813 static boolean_t
zen_umc_decode_rows(const zen_umc_t * umc,zen_umc_decoder_t * dec)1814 zen_umc_decode_rows(const zen_umc_t *umc, zen_umc_decoder_t *dec)
1815 {
1816 	uint32_t row = 0;
1817 	uint8_t inv;
1818 	const umc_cs_t *cs = dec->dec_cs;
1819 	const uint_t total_bits = cs->ucs_nrow_lo + cs->ucs_nrow_hi;
1820 	const uint_t lo_end = cs->ucs_nrow_lo + cs->ucs_row_low_bit - 1;
1821 
1822 	row = bitx64(dec->dec_norm_addr, lo_end, cs->ucs_row_low_bit);
1823 	if (cs->ucs_nrow_hi > 0) {
1824 		const uint_t hi_end = cs->ucs_nrow_hi + cs->ucs_row_hi_bit - 1;
1825 		const uint32_t hi = bitx64(dec->dec_norm_addr, hi_end,
1826 		    cs->ucs_row_hi_bit);
1827 
1828 		row |= hi << cs->ucs_nrow_lo;
1829 	}
1830 
1831 	if (dec->dec_cs_sec) {
1832 		inv = cs->ucs_inv_msbs_sec;
1833 	} else {
1834 		inv = cs->ucs_inv_msbs;
1835 	}
1836 
1837 	/*
1838 	 * We need to potentially invert the top two bits of the row address
1839 	 * based on the low two bits of the inverted register below. Note, inv
1840 	 * only has two valid bits below. So we shift them into place to perform
1841 	 * the XOR. See the big theory statement in zen_umc.c for more on why
1842 	 * this works.
1843 	 */
1844 	inv = inv << (total_bits - 2);
1845 	row = row ^ inv;
1846 
1847 	dec->dec_dimm_row = row;
1848 	return (B_TRUE);
1849 }
1850 
1851 /*
1852  * Several of the hash schemes ask us to go through and xor all the bits that
1853  * are in an address to transform it into a single bit. This implements that for
1854  * a uint32_t. This is basically a bitwise XOR reduce.
1855  */
1856 static uint8_t
zen_umc_running_xor32(const uint32_t in)1857 zen_umc_running_xor32(const uint32_t in)
1858 {
1859 	uint8_t run = 0;
1860 
1861 	for (uint_t i = 0; i < sizeof (in) * NBBY; i++) {
1862 		run ^= bitx32(in, i, i);
1863 	}
1864 
1865 	return (run);
1866 }
1867 
1868 static uint8_t
zen_umc_running_xor64(const uint64_t in)1869 zen_umc_running_xor64(const uint64_t in)
1870 {
1871 	uint8_t run = 0;
1872 
1873 	for (uint_t i = 0; i < sizeof (in) * NBBY; i++) {
1874 		run ^= bitx64(in, i, i);
1875 	}
1876 
1877 	return (run);
1878 }
1879 
1880 /*
1881  * Our goal here is to extract the number of banks and bank groups that are
1882  * used, if any.
1883  */
1884 static boolean_t
zen_umc_decode_banks(const zen_umc_t * umc,zen_umc_decoder_t * dec)1885 zen_umc_decode_banks(const zen_umc_t *umc, zen_umc_decoder_t *dec)
1886 {
1887 	uint8_t bank = 0;
1888 	const umc_cs_t *cs = dec->dec_cs;
1889 	const umc_chan_hash_t *hash = &dec->dec_umc_chan->chan_hash;
1890 
1891 	/*
1892 	 * Get an initial bank address bit and then perform any hashing if
1893 	 * bank hashing is enabled. Note, the memory controller's nbanks is the
1894 	 * total number of bank and bank group bits, hence why it's used for
1895 	 * the loop counter.
1896 	 */
1897 	for (uint_t i = 0; i < cs->ucs_nbanks; i++) {
1898 		uint32_t row_hash, col_hash;
1899 		uint8_t row_xor, col_xor;
1900 		uint_t targ = cs->ucs_bank_bits[i];
1901 		uint8_t val = bitx64(dec->dec_norm_addr, targ, targ);
1902 		const umc_bank_hash_t *bank_hash = &hash->uch_bank_hashes[i];
1903 
1904 		if ((hash->uch_flags & UMC_CHAN_HASH_F_BANK) == 0 ||
1905 		    !hash->uch_bank_hashes[i].ubh_en) {
1906 			bank |= val << i;
1907 			continue;
1908 		}
1909 
1910 		/*
1911 		 * See the big theory statement for more on this. Short form,
1912 		 * bit-wise AND the row and column, then XOR shenanigans.
1913 		 */
1914 		row_hash = dec->dec_dimm_row & bank_hash->ubh_row_xor;
1915 		col_hash = dec->dec_dimm_col & bank_hash->ubh_col_xor;
1916 		row_xor = zen_umc_running_xor32(row_hash);
1917 		col_xor = zen_umc_running_xor32(col_hash);
1918 		bank |= (row_xor ^ col_xor ^ val) << i;
1919 	}
1920 
1921 	/*
1922 	 * The bank and bank group are conjoined in the register and bit
1923 	 * definitions. Once we've calculated that, extract it.
1924 	 */
1925 	dec->dec_dimm_bank_group = bitx8(bank, cs->ucs_nbank_groups - 1, 0);
1926 	dec->dec_dimm_bank = bitx8(bank, cs->ucs_nbanks, cs->ucs_nbank_groups);
1927 	return (B_TRUE);
1928 }
1929 
1930 /*
1931  * Extract the sub-channel. If not a DDR5 based device, simply set it to zero
1932  * and return. We can't forget to hash this if required.
1933  */
1934 static boolean_t
zen_umc_decode_subchan(const zen_umc_t * umc,zen_umc_decoder_t * dec)1935 zen_umc_decode_subchan(const zen_umc_t *umc, zen_umc_decoder_t *dec)
1936 {
1937 	uint8_t subchan;
1938 	uint32_t row_hash, col_hash, bank_hash;
1939 	uint8_t row_xor, col_xor, bank_xor;
1940 	const umc_cs_t *cs = dec->dec_cs;
1941 	const umc_chan_hash_t *hash = &dec->dec_umc_chan->chan_hash;
1942 
1943 	switch (dec->dec_umc_chan->chan_type) {
1944 	case UMC_DIMM_T_DDR5:
1945 	case UMC_DIMM_T_LPDDR5:
1946 		break;
1947 	default:
1948 		dec->dec_dimm_subchan = 0;
1949 		return (B_TRUE);
1950 	}
1951 
1952 	subchan = bitx64(dec->dec_norm_addr, cs->ucs_subchan, cs->ucs_subchan);
1953 	if ((hash->uch_flags & UMC_CHAN_HASH_F_PC) == 0 ||
1954 	    !hash->uch_pc_hash.uph_en) {
1955 		dec->dec_dimm_subchan = subchan;
1956 		return (B_TRUE);
1957 	}
1958 
1959 	row_hash = dec->dec_dimm_row & hash->uch_pc_hash.uph_row_xor;
1960 	col_hash = dec->dec_dimm_col & hash->uch_pc_hash.uph_col_xor;
1961 	bank_hash = dec->dec_dimm_bank & hash->uch_pc_hash.uph_bank_xor;
1962 	row_xor = zen_umc_running_xor32(row_hash);
1963 	col_xor = zen_umc_running_xor32(col_hash);
1964 	bank_xor = zen_umc_running_xor32(bank_hash);
1965 
1966 	dec->dec_dimm_subchan = subchan ^ row_xor ^ col_xor ^ bank_xor;
1967 	return (B_TRUE);
1968 }
1969 
1970 /*
1971  * Note that we have normalized the RM bits between the primary and secondary
1972  * base/mask registers so that way even though the DDR5 controller always uses
1973  * the same RM selection bits, it works in a uniform way for both DDR4 and DDR5.
1974  */
1975 static boolean_t
zen_umc_decode_rank_mul(const zen_umc_t * umc,zen_umc_decoder_t * dec)1976 zen_umc_decode_rank_mul(const zen_umc_t *umc, zen_umc_decoder_t *dec)
1977 {
1978 	uint8_t rm = 0;
1979 	const umc_cs_t *cs = dec->dec_cs;
1980 	const umc_chan_hash_t *hash = &dec->dec_umc_chan->chan_hash;
1981 
1982 	for (uint_t i = 0; i < cs->ucs_nrm; i++) {
1983 		uint8_t index = cs->ucs_rm_bits[i];
1984 		uint8_t bit = bitx64(dec->dec_norm_addr, index, index);
1985 
1986 		if ((hash->uch_flags & UMC_CHAN_HASH_F_RM) != 0 &&
1987 		    hash->uch_rm_hashes[i].uah_en) {
1988 			uint64_t norm_mask = dec->dec_norm_addr &
1989 			    hash->uch_rm_hashes[i].uah_addr_xor;
1990 			uint8_t norm_hash = zen_umc_running_xor64(norm_mask);
1991 			bit = bit ^ norm_hash;
1992 		}
1993 
1994 		rm |= bit << i;
1995 	}
1996 
1997 	dec->dec_dimm_rm = rm;
1998 	return (B_TRUE);
1999 }
2000 
2001 /*
2002  * Go through and determine the actual chip-select activated. This is subject to
2003  * hashing. Note, we first constructed a logical chip-select value based on
2004  * which of the four base/mask registers in the UMC we activated for the
2005  * channel. That basically seeded the two bit value we start with.
2006  */
2007 static boolean_t
zen_umc_decode_chipsel(const zen_umc_t * umc,zen_umc_decoder_t * dec)2008 zen_umc_decode_chipsel(const zen_umc_t *umc, zen_umc_decoder_t *dec)
2009 {
2010 	uint8_t csno = 0;
2011 	const umc_cs_t *cs = dec->dec_cs;
2012 	const umc_chan_hash_t *hash = &dec->dec_umc_chan->chan_hash;
2013 
2014 	for (uint_t i = 0; i < ZEN_UMC_MAX_CS_BITS; i++) {
2015 		uint8_t bit = bitx8(dec->dec_log_csno, i, i);
2016 		if ((hash->uch_flags & UMC_CHAN_HASH_F_CS) != 0 &&
2017 		    hash->uch_cs_hashes[i].uah_en) {
2018 			uint64_t mask = dec->dec_norm_addr &
2019 			    hash->uch_cs_hashes[i].uah_addr_xor;
2020 			uint8_t rxor = zen_umc_running_xor64(mask);
2021 			bit = bit ^ rxor;
2022 		}
2023 		csno |= bit << i;
2024 	}
2025 
2026 	/*
2027 	 * It is not entirely clear what the circumstances are that we need to
2028 	 * apply the chip-select xor. Right now we always apply it. This only
2029 	 * exists on a few DDR5 SoCs, it seems, and we zero out other cases to
2030 	 * try and have a uniform and reasonable path. This tells us what the
2031 	 * absolute chip-select is in the channel. We record this for debugging
2032 	 * purposes and to derive the DIMM and CS.
2033 	 */
2034 	dec->dec_chan_csno = (csno ^ cs->ucs_cs_xor) & 0x3;
2035 
2036 	/*
2037 	 * Now that we actually know which chip-select we're targeting, go back
2038 	 * and actual indicate which DIMM we'll go out to and what chip-select
2039 	 * it is relative to the DIMM. This may have changed out due to CS
2040 	 * hashing. As such we have to now snapshot our final DIMM and
2041 	 * chip-select.
2042 	 */
2043 	dec->dec_dimm_no = dec->dec_chan_csno >> 1;
2044 	dec->dec_dimm_csno = dec->dec_chan_csno % 2;
2045 	return (B_TRUE);
2046 }
2047 
2048 /*
2049  * Initialize the decoder state. We do this by first zeroing it all and then
2050  * setting various result addresses to the UINTXX_MAX that is appropriate. These
2051  * work as better sentinel values than zero; however, we always zero the
2052  * structure to be defensive, cover pointers, etc.
2053  */
2054 static void
zen_umc_decoder_init(zen_umc_decoder_t * dec)2055 zen_umc_decoder_init(zen_umc_decoder_t *dec)
2056 {
2057 	bzero(dec, sizeof (*dec));
2058 
2059 	dec->dec_pa = dec->dec_ilv_pa = UINT64_MAX;
2060 	dec->dec_df_ruleno = UINT32_MAX;
2061 	dec->dec_ilv_sock = dec->dec_ilv_die = dec->dec_ilv_chan =
2062 	    dec->dec_ilv_fabid = dec->dec_log_fabid = dec->dec_remap_comp =
2063 	    dec->dec_targ_fabid = UINT32_MAX;
2064 	dec->dec_umc_ruleno = UINT32_MAX;
2065 	dec->dec_norm_addr = UINT64_MAX;
2066 	dec->dec_dimm_col = dec->dec_dimm_row = UINT32_MAX;
2067 	dec->dec_log_csno = dec->dec_dimm_bank = dec->dec_dimm_bank_group =
2068 	    dec->dec_dimm_subchan = dec->dec_dimm_rm = dec->dec_chan_csno =
2069 	    dec->dec_dimm_no = dec->dec_dimm_csno = UINT8_MAX;
2070 }
2071 
2072 boolean_t
zen_umc_decode_pa(const zen_umc_t * umc,const uint64_t pa,zen_umc_decoder_t * dec)2073 zen_umc_decode_pa(const zen_umc_t *umc, const uint64_t pa,
2074     zen_umc_decoder_t *dec)
2075 {
2076 	zen_umc_decoder_init(dec);
2077 	dec->dec_pa = pa;
2078 
2079 	/*
2080 	 * Before we proceed through decoding, the first thing we should try to
2081 	 * do is verify that this is even something that could be DRAM.
2082 	 */
2083 	if (!zen_umc_decode_is_dram(umc, dec)) {
2084 		ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE);
2085 		return (B_FALSE);
2086 	}
2087 
2088 	/*
2089 	 * The very first thing that we need to do is find a data fabric rule
2090 	 * that corresponds to this memory address. This will be used to
2091 	 * determine which set of rules for interleave and related we actually
2092 	 * should then use.
2093 	 */
2094 	if (!zen_umc_decode_find_df_rule(umc, dec)) {
2095 		ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE);
2096 		return (B_FALSE);
2097 	}
2098 
2099 	/*
2100 	 * Now that we have a DF rule, we must take a more involved step of
2101 	 * mapping to a given CS, e.g. a specific UMC channel. This will tell us
2102 	 * the socket and die as well. This takes care of all the interleaving
2103 	 * and remapping and produces a target fabric ID.
2104 	 */
2105 	if (!zen_umc_decode_sysaddr_to_csid(umc, dec)) {
2106 		ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE);
2107 		return (B_FALSE);
2108 	}
2109 
2110 	/*
2111 	 * With that target ID known, now actually map this to a corresponding
2112 	 * UMC.
2113 	 */
2114 	if (!zen_umc_decode_find_umc_rule(umc, dec)) {
2115 		ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE);
2116 		return (B_FALSE);
2117 	}
2118 
2119 	/*
2120 	 * With the target and corresponding rules and offset information,
2121 	 * actually perform normalization.
2122 	 */
2123 	if (!zen_umc_decode_sysaddr_to_norm(umc, dec)) {
2124 		ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE);
2125 		return (B_FALSE);
2126 	}
2127 
2128 	/*
2129 	 * Finally, we somehow managed to actually construct a normalized
2130 	 * address. Now we must begin the act of transforming this channel
2131 	 * address into something that makes sense to address a DIMM. To start
2132 	 * with determine which logical chip-select, which determines where we
2133 	 * source all our data to use.
2134 	 */
2135 	if (!zen_umc_decode_find_cs(umc, dec)) {
2136 		ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE);
2137 		return (B_FALSE);
2138 	}
2139 
2140 	/*
2141 	 * Now that we have the logical chip-select matched that we're sourcing
2142 	 * our data from, the next this is a bit more involved: we need to
2143 	 * extract the row, column, rank/rank multiplication, bank, and bank
2144 	 * group out of all this, while taking into account all of our hashes.
2145 	 *
2146 	 * To do this, we begin by first calculating the row and column as those
2147 	 * will be needed to determine some of our other values here.
2148 	 */
2149 	if (!zen_umc_decode_rows(umc, dec)) {
2150 		ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE);
2151 		return (B_FALSE);
2152 	}
2153 
2154 	if (!zen_umc_decode_cols(umc, dec)) {
2155 		ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE);
2156 		return (B_FALSE);
2157 	}
2158 
2159 	/*
2160 	 * Now that we have the rows and columns we can go through and determine
2161 	 * the bank and bank group. This depends on the above.
2162 	 */
2163 	if (!zen_umc_decode_banks(umc, dec)) {
2164 		ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE);
2165 		return (B_FALSE);
2166 	}
2167 
2168 	/*
2169 	 * If we have a DDR5 generation DIMM then we need to consider the
2170 	 * subchannel. This doesn't exist in DDR4 systems (the function handles
2171 	 * this reality). Because of potential hashing, this needs to come after
2172 	 * the row, column, and bank have all been determined.
2173 	 */
2174 	if (!zen_umc_decode_subchan(umc, dec)) {
2175 		ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE);
2176 		return (B_FALSE);
2177 	}
2178 
2179 	/*
2180 	 * Time for the last two pieces here: the actual chip select used and
2181 	 * then figuring out which rank, taking into account rank
2182 	 * multiplication. Don't worry, these both have hashing opportunities.
2183 	 */
2184 	if (!zen_umc_decode_rank_mul(umc, dec)) {
2185 		ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE);
2186 		return (B_FALSE);
2187 	}
2188 
2189 	if (!zen_umc_decode_chipsel(umc, dec)) {
2190 		ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE);
2191 		return (B_FALSE);
2192 	}
2193 
2194 	/*
2195 	 * Somehow, that's it.
2196 	 */
2197 	return (B_TRUE);
2198 }
2199