1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2024 Oxide Computer Company
14 */
15
16 /*
17 * Zen UMC Decoding logic. See zen_umc.c for an overview of everything. This
18 * implements shared userland/kernel decoding.
19 */
20
21 #include "zen_umc.h"
22
23 #ifndef _KERNEL
24 #include <strings.h>
25 #endif
26
27 /*
28 * Address constants.
29 */
30 #define ZEN_UMC_TOM2_START 0x100000000ULL
31 #define ZEN_UMC_TOM2_RSVD_BEGIN 0xfd00000000ULL
32 #define ZEN_UMC_TOM2_RSVD_END 0x10000000000ULL
33
34 /*
35 * COD based hashing constants.
36 */
37 #define ZEN_UMC_COD_NBITS 3
38 #define ZEN_UMC_NPS_MOD_NBITS 3
39
40 /*
41 * Enumeration that represents which parts of the NPS 1K/2K non-power of 2 hash
42 * we should use. These are ordered such their indexes correspond with the
43 * 'hashes' array indexes used in zen_umc_decode_ileave_nps_k_mod().
44 */
45 typedef enum {
46 ZEN_UMC_NP2_K_HASH_8 = 0,
47 ZEN_UMC_NP2_K_HASH_9,
48 ZEN_UMC_NP2_K_HASH_12,
49 ZEN_UMC_NP2_K_HASH_13
50 } zen_umc_np2_k_hash_t;
51
52 typedef struct {
53 /*
54 * Indicates what the type of this rule is.
55 */
56 df_chan_ileave_t zukr_type;
57 /*
58 * This is the modulus that this rule uses.
59 */
60 uint32_t zukr_mod;
61 /*
62 * Indicates that this rule requires socket interleaving. Otherwise we
63 * expect no socket interleaving to be enabled.
64 */
65 boolean_t zukr_sock;
66 /*
67 * This is the 'high' portion of the original address that is used as
68 * part of the division and modulus logic when we take it. This bit is
69 * inclusive, e.g. a value of 12 indicates we want addr[64:12].
70 */
71 uint32_t zukr_high;
72 /*
73 * This indicates at what point in the modulus address the high bits
74 * should arrive at.
75 */
76 uint32_t zukr_mod_shift;
77 /*
78 * This indicates how we should fill the remaining bits in the modulus
79 * address. This is either zero filled or an original address bit. Only
80 * address bits 8 or 9 are ever used so we cheat and treat a zero here
81 * as zero filled. Only the first zukr_mod_shift bits will be
82 * considered. This and zukr_mod_shit are used prior to the modulus
83 * calculation.
84 */
85 uint32_t zukr_mod_fill[5];
86 /*
87 * The next series of values defines how to construct the channel. The
88 * channel is always made up of some number of bits from the modulus
89 * value and then optionally some of the hash bits. The first value
90 * indicates how many bits to shift the resulting modulus value by. Any
91 * bit that it is shifted over by must be filled by a hashed value. The
92 * indication of which hash bit is indicated by its starting address
93 * number.
94 */
95 uint32_t zukr_chan_mod_shift;
96 zen_umc_np2_k_hash_t zukr_chan_fill[2];
97 /*
98 * Next, it's time to describe how to construct the normalized address.
99 * There is a portion of it which is divided by the modulus. This is
100 * always going to be the high bits, but sometimes includes additional
101 * lower parts of the physical address ORed in. The first value
102 * indicates how many consecutive address bits should be included. The
103 * second indicates the starting address.
104 */
105 uint32_t zukr_div_addr;
106 uint32_t zukr_div_naddr;
107 /*
108 * Finally the middle portion of the normalized address.
109 */
110 uint32_t zukr_norm_addr;
111 uint32_t zukr_norm_naddr;
112 } zen_umc_np2_k_rule_t;
113
114 const zen_umc_np2_k_rule_t zen_umc_np2_k_rules[] = { {
115 .zukr_type = DF_CHAN_ILEAVE_NPS4_3CH_1K,
116 .zukr_mod = 3,
117 .zukr_high = 12,
118 .zukr_mod_shift = 2,
119 .zukr_mod_fill = { 8, 9 },
120 .zukr_chan_mod_shift = 0,
121 .zukr_div_addr = 8,
122 .zukr_div_naddr = 2,
123 .zukr_norm_addr = 10,
124 .zukr_norm_naddr = 2
125 }, {
126 .zukr_type = DF_CHAN_ILEAVE_NPS4_3CH_2K,
127 .zukr_mod = 3,
128 .zukr_high = 12,
129 .zukr_mod_shift = 2,
130 .zukr_mod_fill = { 0, 8 },
131 .zukr_chan_mod_shift = 0,
132 .zukr_div_addr = 8,
133 .zukr_div_naddr = 1,
134 .zukr_norm_addr = 9,
135 .zukr_norm_naddr = 3
136 }, {
137 .zukr_type = DF_CHAN_ILEAVE_NPS2_6CH_1K,
138 .zukr_mod = 3,
139 .zukr_high = 12,
140 .zukr_mod_shift = 2,
141 .zukr_mod_fill = { 0, 9 },
142 .zukr_chan_mod_shift = 1,
143 .zukr_chan_fill = { ZEN_UMC_NP2_K_HASH_8 },
144 .zukr_div_addr = 9,
145 .zukr_div_naddr = 1,
146 .zukr_norm_addr = 10,
147 .zukr_norm_naddr = 2
148 }, {
149 .zukr_type = DF_CHAN_ILEAVE_NPS2_6CH_2K,
150 .zukr_mod = 3,
151 .zukr_high = 12,
152 .zukr_mod_shift = 2,
153 .zukr_mod_fill = { 0, 0 },
154 .zukr_chan_mod_shift = 1,
155 .zukr_chan_fill = { ZEN_UMC_NP2_K_HASH_8 },
156 .zukr_div_naddr = 0,
157 .zukr_norm_addr = 9,
158 .zukr_norm_naddr = 3
159 }, {
160 .zukr_type = DF_CHAN_ILEAVE_NPS1_12CH_1K,
161 .zukr_mod = 3,
162 .zukr_high = 12,
163 .zukr_mod_shift = 2,
164 .zukr_mod_fill = { 0, 0 },
165 .zukr_chan_mod_shift = 2,
166 .zukr_chan_fill = { ZEN_UMC_NP2_K_HASH_8, ZEN_UMC_NP2_K_HASH_9 },
167 .zukr_div_naddr = 0,
168 .zukr_norm_addr = 10,
169 .zukr_norm_naddr = 2
170 }, {
171 .zukr_type = DF_CHAN_ILEAVE_NPS1_12CH_1K,
172 .zukr_mod = 3,
173 .zukr_high = 12,
174 .zukr_mod_shift = 2,
175 .zukr_mod_fill = { 0, 0 },
176 .zukr_chan_mod_shift = 2,
177 .zukr_chan_fill = { ZEN_UMC_NP2_K_HASH_8, ZEN_UMC_NP2_K_HASH_9 },
178 .zukr_div_naddr = 0,
179 .zukr_norm_addr = 10,
180 .zukr_norm_naddr = 2
181 }, {
182 .zukr_type = DF_CHAN_ILEAVE_NPS1_12CH_2K,
183 .zukr_mod = 3,
184 .zukr_high = 13,
185 .zukr_mod_shift = 3,
186 .zukr_mod_fill = { 0, 0, 0 },
187 .zukr_chan_mod_shift = 2,
188 .zukr_chan_fill = { ZEN_UMC_NP2_K_HASH_8, ZEN_UMC_NP2_K_HASH_12 },
189 .zukr_div_naddr = 0,
190 .zukr_norm_addr = 9,
191 .zukr_norm_naddr = 3
192 }, {
193 .zukr_type = DF_CHAN_ILEAVE_NPS0_24CH_1K,
194 .zukr_mod = 3,
195 .zukr_sock = B_TRUE,
196 .zukr_high = 13,
197 .zukr_mod_shift = 3,
198 .zukr_mod_fill = { 0, 0, 0 },
199 .zukr_chan_mod_shift = 2,
200 .zukr_chan_fill = { ZEN_UMC_NP2_K_HASH_9, ZEN_UMC_NP2_K_HASH_12 },
201 .zukr_div_naddr = 0,
202 .zukr_norm_addr = 10,
203 .zukr_norm_naddr = 2
204 }, {
205 .zukr_type = DF_CHAN_ILEAVE_NPS0_24CH_2K,
206 .zukr_mod = 3,
207 .zukr_sock = B_TRUE,
208 .zukr_high = 14,
209 .zukr_mod_shift = 4,
210 .zukr_mod_fill = { 0, 0, 0, 0 },
211 .zukr_chan_mod_shift = 2,
212 .zukr_chan_fill = { ZEN_UMC_NP2_K_HASH_12, ZEN_UMC_NP2_K_HASH_13 },
213 .zukr_div_naddr = 0,
214 .zukr_norm_addr = 9,
215 .zukr_norm_naddr = 3
216 }, {
217 .zukr_type = DF_CHAN_ILEAVE_NPS2_5CH_1K,
218 .zukr_mod = 5,
219 .zukr_high = 12,
220 .zukr_mod_shift = 2,
221 .zukr_mod_fill = { 8, 9 },
222 .zukr_chan_mod_shift = 0,
223 .zukr_div_addr = 8,
224 .zukr_div_naddr = 2,
225 .zukr_norm_addr = 10,
226 .zukr_norm_naddr = 2
227 }, {
228 .zukr_type = DF_CHAN_ILEAVE_NPS2_5CH_2K,
229 .zukr_mod = 5,
230 .zukr_high = 12,
231 .zukr_mod_shift = 2,
232 .zukr_mod_fill = { 0, 8 },
233 .zukr_chan_mod_shift = 0,
234 .zukr_div_addr = 8,
235 .zukr_div_naddr = 1,
236 .zukr_norm_addr = 9,
237 .zukr_norm_naddr = 3
238 }, {
239 .zukr_type = DF_CHAN_ILEAVE_NPS1_10CH_1K,
240 .zukr_mod = 5,
241 .zukr_high = 12,
242 .zukr_mod_shift = 2,
243 .zukr_mod_fill = { 0, 9 },
244 .zukr_chan_mod_shift = 1,
245 .zukr_chan_fill = { ZEN_UMC_NP2_K_HASH_8 },
246 .zukr_div_addr = 9,
247 .zukr_div_naddr = 1,
248 .zukr_norm_addr = 10,
249 .zukr_norm_naddr = 2
250 }, {
251 .zukr_type = DF_CHAN_ILEAVE_NPS1_10CH_2K,
252 .zukr_mod = 5,
253 .zukr_high = 12,
254 .zukr_mod_shift = 2,
255 .zukr_mod_fill = { 0, 0 },
256 .zukr_chan_mod_shift = 1,
257 .zukr_chan_fill = { ZEN_UMC_NP2_K_HASH_8 },
258 .zukr_div_naddr = 0,
259 .zukr_norm_addr = 9,
260 .zukr_norm_naddr = 3
261 } };
262
263 /*
264 * We want to apply some initial heuristics to determine if a physical address
265 * is DRAM before we proceed because of the MMIO hole and related. The DRAM
266 * ranges can overlap with these system reserved ranges so we have to manually
267 * check these. Effectively this means that we have a few valid ranges:
268 *
269 * o [ 0, TOM )
270 * o [ 4 GiB, TOM2 )
271 *
272 * However, the above 4 GiB runs into trouble depending on size. There is a 12
273 * GiB system reserved address region right below 1 TiB. So it really turns
274 * into the following when we have more than 1 TiB of DRAM:
275 *
276 * o [ 0, TOM )
277 * o [ 4 GiB, 1 TiB - 12 GiB )
278 * o [ 1 TiB, TOM2 )
279 *
280 * Note, this does not currently scan MTRRs or MMIO rules for what might be
281 * redirected to MMIO.
282 */
283 static boolean_t
zen_umc_decode_is_dram(const zen_umc_t * umc,zen_umc_decoder_t * dec)284 zen_umc_decode_is_dram(const zen_umc_t *umc, zen_umc_decoder_t *dec)
285 {
286 if (dec->dec_pa < umc->umc_tom) {
287 return (B_TRUE);
288 }
289
290 if (dec->dec_pa >= umc->umc_tom2) {
291 dec->dec_fail = ZEN_UMC_DECODE_F_OUTSIDE_DRAM;
292 return (B_FALSE);
293 }
294
295 /*
296 * If the address is in the reserved hole around 1 TiB, do not proceed.
297 */
298 if (dec->dec_pa >= ZEN_UMC_TOM2_RSVD_BEGIN &&
299 dec->dec_pa < ZEN_UMC_TOM2_RSVD_END) {
300 dec->dec_fail = ZEN_UMC_DECODE_F_OUTSIDE_DRAM;
301 return (B_FALSE);
302 }
303
304 /*
305 * Now that we've validated we're not in the hole, check to see if we're
306 * actually in a valid region for TOM2.
307 */
308 if (dec->dec_pa >= ZEN_UMC_TOM2_START &&
309 dec->dec_pa < umc->umc_tom2) {
310 return (B_TRUE);
311 }
312
313 /*
314 * At this point we have eliminated all known DRAM regions described by
315 * TOM and TOM2, so we have to conclude that whatever we're looking at
316 * is now not part of DRAM.
317 */
318 dec->dec_fail = ZEN_UMC_DECODE_F_OUTSIDE_DRAM;
319 return (B_FALSE);
320 }
321
322 /*
323 * In our first stop on decoding, we need to go through and take a physical
324 * address and figure out what the corresponding initial DF rule that applies
325 * is. This rule will then be used to figure out which target on the data fabric
326 * we should be going to and what interleaving rules apply.
327 *
328 * Our DRAM rule may reflect that the DRAM hole is active. In this case the
329 * specified range in the rule will be larger than the actual amount of DRAM
330 * present. MMIO accesses take priority over DRAM accesses in the core and
331 * therefore the MMIO portion of the rule is not actually decoded. When trying
332 * to match a rule we do not need to worry about that and can just look whether
333 * our physical address matches a rule. We will take into account whether
334 * hoisting should adjust the address when we translate from a system address to
335 * a normal address (e.g. an address in the channel) which will be done in a
336 * subsequent step. If an address is in the hole, that has already been
337 * accounted for.
338 *
339 * While gathering information, we have all the DRAM rules for a given CCM that
340 * corresponds to a CPU core. This allows us to review all DRAM rules in one
341 * place rather than walking through what's been assigned to each UMC instance,
342 * which only has the rules that are directed towards that particular channel
343 * and matter for determining channel offsets.
344 */
345 static boolean_t
zen_umc_decode_find_df_rule(const zen_umc_t * umc,zen_umc_decoder_t * dec)346 zen_umc_decode_find_df_rule(const zen_umc_t *umc, zen_umc_decoder_t *dec)
347 {
348 const zen_umc_df_t *df = &umc->umc_dfs[0];
349
350 for (uint_t i = 0; i < df->zud_dram_nrules; i++) {
351 const df_dram_rule_t *rule = &df->zud_rules[i];
352
353 /*
354 * If this rule is not enabled, skip it.
355 */
356 if ((rule->ddr_flags & DF_DRAM_F_VALID) == 0)
357 continue;
358
359 if (dec->dec_pa >= rule->ddr_base &&
360 dec->dec_pa < rule->ddr_limit) {
361 dec->dec_df_ruleno = i;
362 dec->dec_df_rule = rule;
363 dec->dec_df_rulesrc = df;
364 return (B_TRUE);
365 }
366 }
367
368 dec->dec_fail = ZEN_UMC_DECODE_F_NO_DF_RULE;
369 return (B_FALSE);
370 }
371
372 /*
373 * This function takes care of the common logic of adjusting an address by the
374 * base value in the rule and determining if we need to apply the DRAM hole or
375 * not. This function is used in two different places:
376 *
377 * o As part of adjusting the system address to construct the interleave
378 * address for DFv4 and Zen 3 based 6-channel hashing (see
379 * zen_umc_determine_ileave_addr() below).
380 * o As part of adjusting the system address at the beginning of normalization
381 * to a channel address.
382 *
383 * One thing to highlight is that the same adjustment we make in the first case
384 * applies to a subset of things for interleaving; however, it applies to
385 * everything when normalizing.
386 */
387 static boolean_t
zen_umc_adjust_dram_addr(const zen_umc_t * umc,zen_umc_decoder_t * dec,uint64_t * addrp,zen_umc_decode_failure_t errno)388 zen_umc_adjust_dram_addr(const zen_umc_t *umc, zen_umc_decoder_t *dec,
389 uint64_t *addrp, zen_umc_decode_failure_t errno)
390 {
391 const uint64_t init_addr = *addrp;
392 const df_dram_rule_t *rule = dec->dec_df_rule;
393 const zen_umc_df_t *df = dec->dec_df_rulesrc;
394 uint64_t mod_addr = init_addr;
395
396 ASSERT3U(init_addr, >=, rule->ddr_base);
397 ASSERT3U(init_addr, <, rule->ddr_limit);
398 mod_addr -= rule->ddr_base;
399
400 /*
401 * Determine if the hole applies to this rule.
402 */
403 if ((rule->ddr_flags & DF_DRAM_F_HOLE) != 0 &&
404 (df->zud_flags & ZEN_UMC_DF_F_HOLE_VALID) != 0 &&
405 init_addr >= ZEN_UMC_TOM2_START) {
406 uint64_t hole_size;
407 hole_size = ZEN_UMC_TOM2_START -
408 umc->umc_dfs[0].zud_hole_base;
409 if (mod_addr < hole_size) {
410 dec->dec_fail = errno;
411 dec->dec_fail_data = dec->dec_df_ruleno;
412 return (B_FALSE);
413 }
414
415 mod_addr -= hole_size;
416 }
417
418 *addrp = mod_addr;
419 return (B_TRUE);
420 }
421
422 /*
423 * Take care of constructing the address we need to use for determining the
424 * interleaving target fabric id. See the big theory statement in zen_umc.c for
425 * more on this.
426 */
427 static boolean_t
zen_umc_determine_ileave_addr(const zen_umc_t * umc,zen_umc_decoder_t * dec)428 zen_umc_determine_ileave_addr(const zen_umc_t *umc, zen_umc_decoder_t *dec)
429 {
430 const df_dram_rule_t *rule = dec->dec_df_rule;
431
432 if ((umc->umc_df_rev <= DF_REV_3 &&
433 rule->ddr_chan_ileave != DF_CHAN_ILEAVE_6CH) ||
434 umc->umc_df_rev >= DF_REV_4D2) {
435 dec->dec_ilv_pa = dec->dec_pa;
436 return (B_TRUE);
437 }
438
439 dec->dec_ilv_pa = dec->dec_pa;
440 if (!zen_umc_adjust_dram_addr(umc, dec, &dec->dec_ilv_pa,
441 ZEN_UMC_DECODE_F_ILEAVE_UNDERFLOW)) {
442 return (B_FALSE);
443 }
444
445 return (B_TRUE);
446 }
447
448 /*
449 * This is a simple interleaving case where we simply extract bits. No hashing
450 * required! Per zen_umc.c, from lowest to highest, we have channel, die, and
451 * then socket bits.
452 */
453 static boolean_t
zen_umc_decode_ileave_nohash(const zen_umc_t * umc,zen_umc_decoder_t * dec)454 zen_umc_decode_ileave_nohash(const zen_umc_t *umc, zen_umc_decoder_t *dec)
455 {
456 uint32_t nchan_bit, ndie_bit, nsock_bit, addr_bit;
457 const df_dram_rule_t *rule = dec->dec_df_rule;
458
459 nsock_bit = rule->ddr_sock_ileave_bits;
460 ndie_bit = rule->ddr_die_ileave_bits;
461 switch (rule->ddr_chan_ileave) {
462 case DF_CHAN_ILEAVE_1CH:
463 nchan_bit = 0;
464 break;
465 case DF_CHAN_ILEAVE_2CH:
466 nchan_bit = 1;
467 break;
468 case DF_CHAN_ILEAVE_4CH:
469 nchan_bit = 2;
470 break;
471 case DF_CHAN_ILEAVE_8CH:
472 nchan_bit = 3;
473 break;
474 case DF_CHAN_ILEAVE_16CH:
475 nchan_bit = 4;
476 break;
477 case DF_CHAN_ILEAVE_32CH:
478 nchan_bit = 5;
479 break;
480 default:
481 dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP;
482 dec->dec_fail_data = rule->ddr_chan_ileave;
483 return (B_FALSE);
484 }
485
486 /*
487 * Zero all of these out in case no bits are dedicated to this purpose.
488 * In those cases, then the value for this is always zero.
489 */
490 dec->dec_ilv_sock = dec->dec_ilv_die = dec->dec_ilv_chan = 0;
491 addr_bit = rule->ddr_addr_start;
492 if (nchan_bit > 0) {
493 dec->dec_ilv_chan = bitx64(dec->dec_ilv_pa,
494 addr_bit + nchan_bit - 1, addr_bit);
495 addr_bit += nchan_bit;
496 }
497
498 if (ndie_bit > 0) {
499 dec->dec_ilv_die = bitx64(dec->dec_ilv_pa,
500 addr_bit + ndie_bit - 1, addr_bit);
501 addr_bit += ndie_bit;
502 }
503
504 if (nsock_bit > 0) {
505 dec->dec_ilv_sock = bitx64(dec->dec_ilv_pa,
506 addr_bit + nsock_bit - 1, addr_bit);
507 addr_bit += nsock_bit;
508 }
509
510 return (B_TRUE);
511 }
512
513 /*
514 * Perform the Zen 2/Zen 3 "COD" based hashing. See the zen_umc.c interleaving
515 * section of the big theory statement for an overview of how this works.
516 */
517 static boolean_t
zen_umc_decode_ileave_cod(const zen_umc_t * umc,zen_umc_decoder_t * dec)518 zen_umc_decode_ileave_cod(const zen_umc_t *umc, zen_umc_decoder_t *dec)
519 {
520 uint32_t nchan_bit;
521 const df_dram_rule_t *rule = dec->dec_df_rule;
522 /*
523 * The order of bits here is defined by AMD. Yes, we do use the rule's
524 * address bit first and then skip to bit 12 for the second hash bit.
525 */
526 const uint32_t addr_bits[3] = { rule->ddr_addr_start, 12, 13 };
527
528 if (rule->ddr_sock_ileave_bits != 0 || rule->ddr_die_ileave_bits != 0) {
529 dec->dec_fail = ZEN_UMC_DECODE_F_COD_BAD_ILEAVE;
530 dec->dec_fail_data = dec->dec_df_ruleno;
531 return (B_FALSE);
532 }
533
534 switch (rule->ddr_chan_ileave) {
535 case DF_CHAN_ILEAVE_COD4_2CH:
536 nchan_bit = 1;
537 break;
538 case DF_CHAN_ILEAVE_COD2_4CH:
539 nchan_bit = 2;
540 break;
541 case DF_CHAN_ILEAVE_COD1_8CH:
542 nchan_bit = 3;
543 break;
544 default:
545 dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP;
546 dec->dec_fail_data = rule->ddr_chan_ileave;
547 return (B_FALSE);
548 }
549
550 dec->dec_ilv_sock = dec->dec_ilv_die = dec->dec_ilv_chan = 0;
551
552 /*
553 * Proceed to calculate the address hash based on the number of bits
554 * that we have been told to use based on the DF rule. Use the flags in
555 * the rule to determine which additional address ranges to hash in.
556 */
557 for (uint_t i = 0; i < nchan_bit; i++) {
558 uint8_t hash = 0;
559
560 hash = bitx64(dec->dec_ilv_pa, addr_bits[i], addr_bits[i]);
561 if ((rule->ddr_flags & DF_DRAM_F_HASH_16_18) != 0) {
562 uint8_t val = bitx64(dec->dec_ilv_pa, 16 + i, 16 + i);
563 hash ^= val;
564 }
565
566 if ((rule->ddr_flags & DF_DRAM_F_HASH_21_23) != 0) {
567 uint8_t val = bitx64(dec->dec_ilv_pa, 21 + i, 21 + i);
568 hash ^= val;
569 }
570
571 if ((rule->ddr_flags & DF_DRAM_F_HASH_30_32) != 0) {
572 uint8_t val = bitx64(dec->dec_ilv_pa, 30 + i, 30 + i);
573 hash ^= val;
574 }
575
576 dec->dec_ilv_chan |= hash << i;
577 }
578
579 return (B_TRUE);
580 }
581
582 /*
583 * Common logic to perform hashing across the NPS, NPS 1K, and NPS 2K variants.
584 */
585 static void
zen_umc_decode_ileave_nps_common(zen_umc_decoder_t * dec,const uint32_t * addr_bits,const uint32_t * adj,uint32_t nsock_bits,uint32_t nchan_bits,boolean_t df4p0)586 zen_umc_decode_ileave_nps_common(zen_umc_decoder_t *dec,
587 const uint32_t *addr_bits, const uint32_t *adj, uint32_t nsock_bits,
588 uint32_t nchan_bits, boolean_t df4p0)
589 {
590 const df_dram_rule_t *rule = dec->dec_df_rule;
591
592 for (uint32_t i = 0; i < nchan_bits + nsock_bits; i++) {
593 uint8_t hash = 0;
594
595 hash = bitx64(dec->dec_ilv_pa, addr_bits[i], addr_bits[i]);
596 if ((rule->ddr_flags & DF_DRAM_F_HASH_16_18) != 0) {
597 uint8_t val = bitx64(dec->dec_ilv_pa, 16 + adj[i],
598 16 + adj[i]);
599 hash ^= val;
600 }
601
602 if ((rule->ddr_flags & DF_DRAM_F_HASH_21_23) != 0) {
603 uint8_t val = bitx64(dec->dec_ilv_pa, 21 + adj[i],
604 21 + adj[i]);
605 hash ^= val;
606 }
607
608 if ((rule->ddr_flags & DF_DRAM_F_HASH_30_32) != 0) {
609 uint8_t val = bitx64(dec->dec_ilv_pa, 30 + adj[i], 30 +
610 adj[i]);
611 hash ^= val;
612 }
613
614 /*
615 * While 1T is only supported in the NPS 1K/2K variant, rule
616 * normalization means this won't be set in the plain NPS case.
617 */
618 if ((rule->ddr_flags & DF_DRAM_F_HASH_40_42) != 0) {
619 uint8_t val = bitx64(dec->dec_ilv_pa, 40 + adj[i],
620 40 + adj[i]);
621 hash ^= val;
622 }
623
624 /*
625 * If this is the first bit and we're not doing socket
626 * interleaving, then we need to add bit 14 to the running hash.
627 * This is only true for a strict DF v4.0 NPS style hash. We
628 * don't perform this for the 1K/2K variant.
629 */
630 if (i == 0 && nsock_bits == 0 && df4p0) {
631 uint8_t val = bitx64(dec->dec_ilv_pa, 14, 14);
632 hash ^= val;
633 }
634
635 /*
636 * If socket interleaving is going on we need to store the first
637 * bit as the socket hash and then redirect the remaining bits
638 * to the channel, taking into account that the shift will be
639 * adjusted as a result.
640 */
641 if (nsock_bits > 0) {
642 if (i == 0) {
643 dec->dec_ilv_sock = hash;
644 } else {
645 dec->dec_ilv_chan |= hash << (i - 1);
646 }
647 } else {
648 dec->dec_ilv_chan |= hash << i;
649 }
650 }
651 }
652
653
654 /*
655 * This implements the standard NPS hash for power of 2 based channel
656 * configurations that is found in DFv4. For more information, please see the
657 * interleaving portion of the zen_umc.c big theory statement.
658 */
659 static boolean_t
zen_umc_decode_ileave_nps(const zen_umc_t * umc,zen_umc_decoder_t * dec)660 zen_umc_decode_ileave_nps(const zen_umc_t *umc, zen_umc_decoder_t *dec)
661 {
662 uint32_t nchan_bit, nsock_bit;
663 const df_dram_rule_t *rule = dec->dec_df_rule;
664 /*
665 * The order of bits here is defined by AMD. Yes, this is start with the
666 * defined address bit and then skip to bit 12.
667 */
668 const uint32_t addr_bits[4] = { rule->ddr_addr_start, 12, 13, 14 };
669 const uint32_t adj[4] = { 0, 1, 2, 3 };
670
671 if (rule->ddr_die_ileave_bits != 0) {
672 dec->dec_fail = ZEN_UMC_DECODE_F_NPS_BAD_ILEAVE;
673 dec->dec_fail_data = dec->dec_df_ruleno;
674 return (B_FALSE);
675 }
676
677 nsock_bit = rule->ddr_sock_ileave_bits;
678 switch (rule->ddr_chan_ileave) {
679 case DF_CHAN_ILEAVE_NPS4_2CH:
680 nchan_bit = 1;
681 break;
682 case DF_CHAN_ILEAVE_NPS2_4CH:
683 nchan_bit = 2;
684 break;
685 case DF_CHAN_ILEAVE_NPS1_8CH:
686 nchan_bit = 3;
687 break;
688 default:
689 dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP;
690 dec->dec_fail_data = rule->ddr_chan_ileave;
691 return (B_FALSE);
692 }
693
694 ASSERT3U(nchan_bit + nsock_bit, <=, 4);
695 dec->dec_ilv_sock = dec->dec_ilv_die = dec->dec_ilv_chan = 0;
696
697 zen_umc_decode_ileave_nps_common(dec, addr_bits, adj, nsock_bit,
698 nchan_bit, B_TRUE);
699 return (B_TRUE);
700 }
701
702 /*
703 * This implements the Zen 5 (really DF 4D2) NPS variants that work on both 1K
704 * and 2K hashing.
705 */
706 static boolean_t
zen_umc_decode_ileave_nps_k(const zen_umc_t * umc,zen_umc_decoder_t * dec)707 zen_umc_decode_ileave_nps_k(const zen_umc_t *umc, zen_umc_decoder_t *dec)
708 {
709 uint32_t nchan_bit, nsock_bit;
710 const df_dram_rule_t *rule = dec->dec_df_rule;
711 const uint32_t addr_bits_1k[5] = { rule->ddr_addr_start, 9, 12, 13,
712 14 };
713 const uint32_t addr_bits_2k[4] = { rule->ddr_addr_start, 12, 13, 14 };
714 const uint32_t adj_1k[5] = { 0, 1, 2, 3, 4 };
715 const uint32_t adj_2k[4] = { 0, 2, 3, 4 };
716 const uint32_t *addr_bits;
717 const uint32_t *adj;
718
719 if (rule->ddr_die_ileave_bits != 0 || rule->ddr_addr_start != 8) {
720 dec->dec_fail = ZEN_UMC_DECODE_F_NPS_BAD_ILEAVE;
721 dec->dec_fail_data = dec->dec_df_ruleno;
722 return (B_FALSE);
723 }
724
725 nsock_bit = rule->ddr_sock_ileave_bits;
726 switch (rule->ddr_chan_ileave) {
727 case DF_CHAN_ILEAVE_NPS4_2CH_1K:
728 case DF_CHAN_ILEAVE_NPS4_2CH_2K:
729 nchan_bit = 1;
730 break;
731 case DF_CHAN_ILEAVE_NPS2_4CH_1K:
732 case DF_CHAN_ILEAVE_NPS2_4CH_2K:
733 nchan_bit = 2;
734 break;
735 case DF_CHAN_ILEAVE_NPS1_8CH_1K:
736 case DF_CHAN_ILEAVE_NPS1_8CH_2K:
737 nchan_bit = 3;
738 break;
739 case DF_CHAN_ILEAVE_NPS1_16CH_1K:
740 case DF_CHAN_ILEAVE_NPS1_16CH_2K:
741 nchan_bit = 4;
742 break;
743 default:
744 dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP;
745 dec->dec_fail_data = rule->ddr_chan_ileave;
746 return (B_FALSE);
747 }
748
749 switch (rule->ddr_chan_ileave) {
750 case DF_CHAN_ILEAVE_NPS4_2CH_1K:
751 case DF_CHAN_ILEAVE_NPS2_4CH_1K:
752 case DF_CHAN_ILEAVE_NPS1_8CH_1K:
753 case DF_CHAN_ILEAVE_NPS1_16CH_1K:
754 ASSERT3U(nchan_bit + nsock_bit, <=, 5);
755 addr_bits = addr_bits_1k;
756 adj = adj_1k;
757 break;
758 case DF_CHAN_ILEAVE_NPS4_2CH_2K:
759 case DF_CHAN_ILEAVE_NPS2_4CH_2K:
760 case DF_CHAN_ILEAVE_NPS1_8CH_2K:
761 case DF_CHAN_ILEAVE_NPS1_16CH_2K:
762 ASSERT3U(nchan_bit + nsock_bit, <=, 4);
763 addr_bits = addr_bits_2k;
764 adj = adj_2k;
765 break;
766 default:
767 dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP;
768 dec->dec_fail_data = rule->ddr_chan_ileave;
769 return (B_FALSE);
770 }
771
772 dec->dec_ilv_sock = dec->dec_ilv_die = dec->dec_ilv_chan = 0;
773 zen_umc_decode_ileave_nps_common(dec, addr_bits, adj, nsock_bit,
774 nchan_bit, B_FALSE);
775 return (B_TRUE);
776 }
777
778 /*
779 * This implements the logic to perform the Zen 3 6ch special hash. It's worth
780 * calling out that unlike all other hash functions, this does not support the
781 * use of the DF_DRAM_F_HASH_16_18 flag.
782 */
783 static void
zen_umc_decode_hash_zen3_6ch(const df_dram_rule_t * rule,uint64_t pa,uint8_t hashes[3])784 zen_umc_decode_hash_zen3_6ch(const df_dram_rule_t *rule, uint64_t pa,
785 uint8_t hashes[3])
786 {
787 uint32_t addr_bit = rule->ddr_addr_start;
788 /*
789 * Yes, we use these in a weird order. No, there is no 64K.
790 */
791 const uint32_t bits_2M[3] = { 23, 21, 22 };
792 const uint32_t bits_1G[3] = { 32, 30, 31 };
793
794 hashes[0] = hashes[1] = hashes[2] = 0;
795 for (uint_t i = 0; i < ZEN_UMC_COD_NBITS; i++) {
796 hashes[i] = bitx64(pa, addr_bit + i, addr_bit + i);
797 if (i == 0) {
798 uint8_t val = bitx64(pa, addr_bit + 3, addr_bit + 3);
799 hashes[i] ^= val;
800 }
801
802 if ((rule->ddr_flags & DF_DRAM_F_HASH_21_23) != 0) {
803 uint8_t val = bitx64(pa, bits_2M[i], bits_2M[i]);
804 hashes[i] ^= val;
805 }
806
807 if ((rule->ddr_flags & DF_DRAM_F_HASH_30_32) != 0) {
808 uint8_t val = bitx64(pa, bits_1G[i], bits_1G[i]);
809 hashes[i] ^= val;
810 }
811 }
812 }
813
814 /*
815 * Perform Zen 3 6-channel hashing. This is pretty weird compared to others. See
816 * the zen_umc.c big theory statement for the thorny details.
817 */
818 static boolean_t
zen_umc_decode_ileave_zen3_6ch(const zen_umc_t * umc,zen_umc_decoder_t * dec)819 zen_umc_decode_ileave_zen3_6ch(const zen_umc_t *umc, zen_umc_decoder_t *dec)
820 {
821 uint8_t hashes[3] = { 0 };
822 const df_dram_rule_t *rule = dec->dec_df_rule;
823 uint32_t addr_bit = rule->ddr_addr_start;
824
825 if (rule->ddr_sock_ileave_bits != 0 || rule->ddr_die_ileave_bits != 0) {
826 dec->dec_fail = ZEN_UMC_DECODE_F_COD_BAD_ILEAVE;
827 dec->dec_fail_data = dec->dec_df_ruleno;
828 return (B_FALSE);
829 }
830
831 zen_umc_decode_hash_zen3_6ch(rule, dec->dec_ilv_pa, hashes);
832 dec->dec_ilv_sock = dec->dec_ilv_die = dec->dec_ilv_chan = 0;
833 dec->dec_ilv_chan = hashes[0];
834 if (hashes[1] == 1 && hashes[2] == 1) {
835 uint64_t mod_addr = dec->dec_ilv_pa >> (addr_bit + 3);
836 dec->dec_ilv_chan |= (mod_addr % 3) << 1;
837 } else {
838 dec->dec_ilv_chan |= hashes[1] << 1;
839 dec->dec_ilv_chan |= hashes[2] << 2;
840 }
841
842 return (B_TRUE);
843 }
844
845 /*
846 * This is the standard hash function for the non-power of two based NPS hashes.
847 * See the big theory statement for more information. Unlike the normal NPS hash
848 * which uses bit 14 conditionally based on socket interleaving, here it is
849 * always used.
850 */
851 static void
zen_umc_decode_hash_nps_mod(const df_dram_rule_t * rule,uint64_t pa,uint8_t hashes[3])852 zen_umc_decode_hash_nps_mod(const df_dram_rule_t *rule, uint64_t pa,
853 uint8_t hashes[3])
854 {
855 const uint32_t addr_bits[3] = { rule->ddr_addr_start, 12, 13 };
856
857 for (uint_t i = 0; i < ZEN_UMC_NPS_MOD_NBITS; i++) {
858 hashes[i] = bitx64(pa, addr_bits[i], addr_bits[i]);
859 if (i == 0) {
860 uint8_t val = bitx64(pa, 14, 14);
861 hashes[i] ^= val;
862 }
863
864 if ((rule->ddr_flags & DF_DRAM_F_HASH_16_18) != 0) {
865 uint8_t val = bitx64(pa, 16 + i, 16 + i);
866 hashes[i] ^= val;
867 }
868
869 if ((rule->ddr_flags & DF_DRAM_F_HASH_21_23) != 0) {
870 uint8_t val = bitx64(pa, 21 + i, 21 + i);
871 hashes[i] ^= val;
872 }
873
874 if ((rule->ddr_flags & DF_DRAM_F_HASH_30_32) != 0) {
875 uint8_t val = bitx64(pa, 30 + i, 30 + i);
876 hashes[i] ^= val;
877 }
878 }
879 }
880
881 static void
zen_umc_decode_hash_nps_k_mod(const df_dram_rule_t * rule,uint64_t pa,uint8_t hashes[4])882 zen_umc_decode_hash_nps_k_mod(const df_dram_rule_t *rule, uint64_t pa,
883 uint8_t hashes[4])
884 {
885 const uint32_t addr_bits[4] = { rule->ddr_addr_start, 9, 12, 13 };
886
887 for (size_t i = 0; i < ARRAY_SIZE(addr_bits); i++) {
888 hashes[i] = bitx64(pa, addr_bits[i], addr_bits[i]);
889 if (i == 0) {
890 uint8_t val = bitx64(pa, 14, 14);
891 hashes[i] ^= val;
892 }
893
894 if ((rule->ddr_flags & DF_DRAM_F_HASH_16_18) != 0) {
895 uint8_t val = bitx64(pa, 16 + i, 16 + i);
896 hashes[i] ^= val;
897 }
898
899 if ((rule->ddr_flags & DF_DRAM_F_HASH_21_23) != 0) {
900 uint8_t val = bitx64(pa, 21 + i, 21 + i);
901 hashes[i] ^= val;
902 }
903
904 if ((rule->ddr_flags & DF_DRAM_F_HASH_30_32) != 0) {
905 uint8_t val = bitx64(pa, 30 + i, 30 + i);
906 hashes[i] ^= val;
907 }
908
909 if ((rule->ddr_flags & DF_DRAM_F_HASH_40_42) != 0) {
910 uint8_t val = bitx64(pa, 40 + i, 40 + i);
911 hashes[i] ^= val;
912 }
913 }
914 }
915
916 /*
917 * See the big theory statement in zen_umc.c which describes the rules for this
918 * computation. This is a little less weird than the Zen 3 one, but still,
919 * unique.
920 */
921 static boolean_t
zen_umc_decode_ileave_nps_mod(const zen_umc_t * umc,zen_umc_decoder_t * dec)922 zen_umc_decode_ileave_nps_mod(const zen_umc_t *umc, zen_umc_decoder_t *dec)
923 {
924 uint8_t hashes[3] = { 0 };
925 uint32_t nsock_bit, chan_mod;
926 const df_dram_rule_t *rule = dec->dec_df_rule;
927
928 if (rule->ddr_die_ileave_bits != 0) {
929 dec->dec_fail = ZEN_UMC_DECODE_F_NPS_BAD_ILEAVE;
930 dec->dec_fail_data = dec->dec_df_ruleno;
931 return (B_FALSE);
932 }
933
934 nsock_bit = rule->ddr_sock_ileave_bits;
935 switch (rule->ddr_chan_ileave) {
936 case DF_CHAN_ILEAVE_NPS4_3CH:
937 case DF_CHAN_ILEAVE_NPS2_6CH:
938 case DF_CHAN_ILEAVE_NPS1_12CH:
939 chan_mod = 3;
940 break;
941 case DF_CHAN_ILEAVE_NPS2_5CH:
942 case DF_CHAN_ILEAVE_NPS1_10CH:
943 chan_mod = 5;
944 break;
945 default:
946 dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP;
947 dec->dec_fail_data = rule->ddr_chan_ileave;
948 return (B_FALSE);
949 }
950
951 dec->dec_ilv_sock = dec->dec_ilv_die = dec->dec_ilv_chan = 0;
952 zen_umc_decode_hash_nps_mod(rule, dec->dec_ilv_pa, hashes);
953
954 if (nsock_bit > 0) {
955 ASSERT3U(nsock_bit, ==, 1);
956 dec->dec_ilv_sock = hashes[0];
957 }
958
959 dec->dec_ilv_chan = bitx64(dec->dec_ilv_pa, 63, 14) % chan_mod;
960 if (hashes[0] == 1) {
961 dec->dec_ilv_chan = (dec->dec_ilv_chan + 1) % chan_mod;
962 }
963
964 /*
965 * Use the remaining hash bits based on the number of channels. There is
966 * nothing else to do for 3/5 channel configs.
967 */
968 switch (rule->ddr_chan_ileave) {
969 case DF_CHAN_ILEAVE_NPS4_3CH:
970 case DF_CHAN_ILEAVE_NPS2_5CH:
971 break;
972 case DF_CHAN_ILEAVE_NPS2_6CH:
973 case DF_CHAN_ILEAVE_NPS1_10CH:
974 dec->dec_ilv_chan += hashes[2] * chan_mod;
975 break;
976 case DF_CHAN_ILEAVE_NPS1_12CH:
977 dec->dec_ilv_chan += ((hashes[2] << 1) | hashes[1]) * chan_mod;
978 break;
979 default:
980 dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP;
981 dec->dec_fail_data = rule->ddr_chan_ileave;
982 return (B_FALSE);
983 }
984
985 return (B_TRUE);
986 }
987
988 /*
989 * Determine the interleave address for the NPS 1K/2K non-power of 2 based
990 * values. Each of these uses a similar style of calculation with rather
991 * different values and as such we use a data table for each of these that maps
992 * to a given rule.
993 */
994 static boolean_t
zen_umc_decode_ileave_nps_k_mod(const zen_umc_t * umc,zen_umc_decoder_t * dec)995 zen_umc_decode_ileave_nps_k_mod(const zen_umc_t *umc, zen_umc_decoder_t *dec)
996 {
997 uint8_t hashes[4] = { 0 };
998 uint32_t chan, mod_val;
999 uint64_t mod_addr;
1000 const df_dram_rule_t *rule = dec->dec_df_rule;
1001 const zen_umc_np2_k_rule_t *np2 = NULL;
1002
1003 for (size_t i = 0; i < ARRAY_SIZE(zen_umc_np2_k_rules); i++) {
1004 if (rule->ddr_chan_ileave == zen_umc_np2_k_rules[i].zukr_type) {
1005 np2 = &zen_umc_np2_k_rules[i];
1006 break;
1007 }
1008 }
1009
1010 if (np2 == NULL) {
1011 dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP;
1012 dec->dec_fail_data = rule->ddr_chan_ileave;
1013 return (B_FALSE);
1014 }
1015
1016 if (rule->ddr_die_ileave_bits != 0 || rule->ddr_addr_start != 8) {
1017 dec->dec_fail = ZEN_UMC_DECODE_F_NPS_BAD_ILEAVE;
1018 dec->dec_fail_data = dec->dec_df_ruleno;
1019 return (B_FALSE);
1020 }
1021
1022 /*
1023 * These rules either require that socket interleaving is enabled or
1024 * not. Make sure that this matches before we proceed.
1025 */
1026 if (np2->zukr_sock != (rule->ddr_sock_ileave_bits == 1)) {
1027 dec->dec_fail = ZEN_UMC_DECODE_F_NPS_BAD_ILEAVE;
1028 dec->dec_fail_data = dec->dec_df_ruleno;
1029 return (B_FALSE);
1030 }
1031
1032 dec->dec_ilv_sock = dec->dec_ilv_die = dec->dec_ilv_chan = 0;
1033 zen_umc_decode_hash_nps_k_mod(rule, dec->dec_ilv_pa, hashes);
1034 if (rule->ddr_sock_ileave_bits > 0) {
1035 ASSERT3U(rule->ddr_sock_ileave_bits, ==, 1);
1036 dec->dec_ilv_sock = hashes[0];
1037 }
1038
1039 mod_addr = bitx64(dec->dec_ilv_pa, 63, np2->zukr_high);
1040 mod_addr = mod_addr << np2->zukr_mod_shift;
1041 for (uint32_t i = 0; i < np2->zukr_mod_shift; i++) {
1042 uint32_t bit = np2->zukr_mod_fill[i];
1043 if (bit != 0) {
1044 uint64_t val = bitx64(dec->dec_ilv_pa, bit, bit);
1045 mod_addr = bitset64(mod_addr, i, i, val);
1046 }
1047 }
1048
1049 mod_val = (uint32_t)(mod_addr % np2->zukr_mod);
1050 chan = mod_val << np2->zukr_chan_mod_shift;
1051 for (uint32_t i = 0; i < np2->zukr_chan_mod_shift; i++) {
1052 VERIFY3U(np2->zukr_chan_fill[i], <, ARRAY_SIZE(hashes));
1053 uint32_t bit = np2->zukr_chan_fill[i];
1054 uint32_t val = hashes[np2->zukr_chan_fill[i]];
1055 chan = bitset32(chan, bit, bit, val);
1056 }
1057
1058 dec->dec_ilv_chan = chan;
1059 return (B_TRUE);
1060 }
1061
1062 /*
1063 * Our next task is to attempt to translate the PA and the DF rule from a system
1064 * address into a normalized address and a particular DRAM channel that it's
1065 * targeting. There are several things that we need to take into account here
1066 * when performing interleaving and translation:
1067 *
1068 * o The DRAM Hole modifying our base address
1069 * o The various interleave bits
1070 * o Potentially hashing based on channel and global settings
1071 * o Potential CS re-targeting registers (only on some systems)
1072 * o Finally, the question of how to adjust for the DRAM hole and the base
1073 * address changes based on the DF generation and channel configuration. This
1074 * influences what address we start interleaving with.
1075 *
1076 * Note, this phase does not actually construct the normalized (e.g. channel)
1077 * address. That's done in a subsequent step. For more background, please see
1078 * the 'Data Fabric Interleaving' section of the zen_umc.c big theory statement.
1079 */
1080 static boolean_t
zen_umc_decode_sysaddr_to_csid(const zen_umc_t * umc,zen_umc_decoder_t * dec)1081 zen_umc_decode_sysaddr_to_csid(const zen_umc_t *umc, zen_umc_decoder_t *dec)
1082 {
1083 uint32_t sock, die, chan, remap_ruleset;
1084 const df_dram_rule_t *rule = dec->dec_df_rule;
1085 const zen_umc_cs_remap_t *remap;
1086
1087 /*
1088 * First, we must determine what the actual address used for
1089 * interleaving is. This varies based on the interleaving and DF
1090 * generation.
1091 */
1092 if (!zen_umc_determine_ileave_addr(umc, dec)) {
1093 return (B_FALSE);
1094 }
1095
1096 switch (rule->ddr_chan_ileave) {
1097 case DF_CHAN_ILEAVE_1CH:
1098 case DF_CHAN_ILEAVE_2CH:
1099 case DF_CHAN_ILEAVE_4CH:
1100 case DF_CHAN_ILEAVE_8CH:
1101 case DF_CHAN_ILEAVE_16CH:
1102 case DF_CHAN_ILEAVE_32CH:
1103 if (!zen_umc_decode_ileave_nohash(umc, dec)) {
1104 return (B_FALSE);
1105 }
1106 break;
1107 case DF_CHAN_ILEAVE_COD4_2CH:
1108 case DF_CHAN_ILEAVE_COD2_4CH:
1109 case DF_CHAN_ILEAVE_COD1_8CH:
1110 if (!zen_umc_decode_ileave_cod(umc, dec)) {
1111 return (B_FALSE);
1112 }
1113 break;
1114 case DF_CHAN_ILEAVE_NPS4_2CH:
1115 case DF_CHAN_ILEAVE_NPS2_4CH:
1116 case DF_CHAN_ILEAVE_NPS1_8CH:
1117 if (!zen_umc_decode_ileave_nps(umc, dec)) {
1118 return (B_FALSE);
1119 }
1120 break;
1121 case DF_CHAN_ILEAVE_6CH:
1122 if (!zen_umc_decode_ileave_zen3_6ch(umc, dec)) {
1123 return (B_FALSE);
1124 }
1125 break;
1126 case DF_CHAN_ILEAVE_NPS4_3CH:
1127 case DF_CHAN_ILEAVE_NPS2_6CH:
1128 case DF_CHAN_ILEAVE_NPS1_12CH:
1129 case DF_CHAN_ILEAVE_NPS2_5CH:
1130 case DF_CHAN_ILEAVE_NPS1_10CH:
1131 if (!zen_umc_decode_ileave_nps_mod(umc, dec)) {
1132 return (B_FALSE);
1133 }
1134 break;
1135 case DF_CHAN_ILEAVE_NPS4_2CH_1K:
1136 case DF_CHAN_ILEAVE_NPS2_4CH_1K:
1137 case DF_CHAN_ILEAVE_NPS1_8CH_1K:
1138 case DF_CHAN_ILEAVE_NPS1_16CH_1K:
1139 case DF_CHAN_ILEAVE_NPS4_2CH_2K:
1140 case DF_CHAN_ILEAVE_NPS2_4CH_2K:
1141 case DF_CHAN_ILEAVE_NPS1_8CH_2K:
1142 case DF_CHAN_ILEAVE_NPS1_16CH_2K:
1143 if (!zen_umc_decode_ileave_nps_k(umc, dec)) {
1144 return (B_FALSE);
1145 }
1146 break;
1147 case DF_CHAN_ILEAVE_NPS4_3CH_1K:
1148 case DF_CHAN_ILEAVE_NPS2_6CH_1K:
1149 case DF_CHAN_ILEAVE_NPS1_12CH_1K:
1150 case DF_CHAN_ILEAVE_NPS0_24CH_1K:
1151 case DF_CHAN_ILEAVE_NPS2_5CH_1K:
1152 case DF_CHAN_ILEAVE_NPS1_10CH_1K:
1153 case DF_CHAN_ILEAVE_NPS4_3CH_2K:
1154 case DF_CHAN_ILEAVE_NPS2_6CH_2K:
1155 case DF_CHAN_ILEAVE_NPS1_12CH_2K:
1156 case DF_CHAN_ILEAVE_NPS0_24CH_2K:
1157 case DF_CHAN_ILEAVE_NPS2_5CH_2K:
1158 case DF_CHAN_ILEAVE_NPS1_10CH_2K:
1159 if (!zen_umc_decode_ileave_nps_k_mod(umc, dec)) {
1160 return (B_FALSE);
1161 }
1162 break;
1163 case DF_CHAN_ILEAVE_MI3H_8CH:
1164 case DF_CHAN_ILEAVE_MI3H_16CH:
1165 case DF_CHAN_ILEAVE_MI3H_32CH:
1166 default:
1167 dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP;
1168 dec->dec_fail_data = rule->ddr_chan_ileave;
1169 return (B_FALSE);
1170 }
1171
1172 /*
1173 * At this point we have dealt with decoding the interleave into the
1174 * logical elements that it contains. We need to transform that back
1175 * into a fabric ID, so we can add it to the base fabric ID in our rule.
1176 * After that, we need to see if there is any CS remapping going on. If
1177 * there is, we will replace the component part of the decomposed fabric
1178 * ID. With that done, we can then transform the components back into
1179 * our target fabric ID, which indicates which UMC we're after.
1180 */
1181 zen_fabric_id_compose(&umc->umc_decomp, dec->dec_ilv_sock,
1182 dec->dec_ilv_die, dec->dec_ilv_chan, &dec->dec_ilv_fabid);
1183 dec->dec_log_fabid = dec->dec_ilv_fabid + rule->ddr_dest_fabid;
1184
1185 /*
1186 * If there's no remapping to do, then we're done. Simply assign the
1187 * logical ID as our target.
1188 */
1189 zen_fabric_id_decompose(&umc->umc_decomp, dec->dec_log_fabid, &sock,
1190 &die, &chan);
1191 if ((rule->ddr_flags & DF_DRAM_F_REMAP_EN) == 0) {
1192 dec->dec_targ_fabid = dec->dec_log_fabid;
1193 return (B_TRUE);
1194 }
1195
1196 /*
1197 * The DF contains multiple remapping tables. We must figure out which
1198 * of these to actually use. There are two different ways that this can
1199 * work. The first way is the one added in DFv4 and is used since then.
1200 * In that case, the DRAM rule includes both that remapping was enabled
1201 * and which of the multiple mapping tables to use.
1202 *
1203 * This feature also exists prior to DFv4, but only in Milan. In that
1204 * world, indicated by the DF_DRAM_F_REMAP_SOCK flag, there is one table
1205 * in each DF per-socket. Based on the destination socket from the data
1206 * fabric ID, you pick the actual table to use.
1207 *
1208 * Once the table has been selected, we maintain the socket and die
1209 * portions of the fabric ID as constants and replace the component with
1210 * the one the remapping table indicates.
1211 *
1212 * Technically each DF has its own copy of the remapping tables. To make
1213 * this work we rely on the following assumption: a given DF node has to
1214 * be able to fully route all DRAM rules to a target. That is, a given
1215 * DF node doesn't really forward a system address to the remote die for
1216 * further interleave processing and therefore we must have enough
1217 * information here to map it totally from the same DF that we got the
1218 * CCM rules from in the first place, DF 0.
1219 */
1220 if ((rule->ddr_flags & DF_DRAM_F_REMAP_SOCK) != 0) {
1221 remap_ruleset = sock;
1222 } else {
1223 remap_ruleset = rule->ddr_remap_ent;
1224 }
1225
1226 if (remap_ruleset >= dec->dec_df_rulesrc->zud_cs_nremap) {
1227 dec->dec_fail = ZEN_UMC_DECODE_F_BAD_REMAP_SET;
1228 dec->dec_fail_data = remap_ruleset;
1229 return (B_FALSE);
1230 }
1231
1232 remap = &dec->dec_df_rulesrc->zud_remap[remap_ruleset];
1233 if (chan >= remap->csr_nremaps) {
1234 dec->dec_fail = ZEN_UMC_DECODE_F_BAD_REMAP_ENTRY;
1235 dec->dec_fail_data = chan;
1236 return (B_FALSE);
1237 }
1238
1239 dec->dec_remap_comp = remap->csr_remaps[chan];
1240 if ((dec->dec_remap_comp & ~umc->umc_decomp.dfd_comp_mask) != 0) {
1241 dec->dec_fail = ZEN_UMC_DECODE_F_REMAP_HAS_BAD_COMP;
1242 dec->dec_fail_data = dec->dec_remap_comp;
1243 return (B_FALSE);
1244 }
1245
1246 zen_fabric_id_compose(&umc->umc_decomp, sock, die, dec->dec_remap_comp,
1247 &dec->dec_targ_fabid);
1248
1249 return (B_TRUE);
1250 }
1251
1252 /*
1253 * Our next step here is to actually take our target ID and find the
1254 * corresponding DF, UMC, and actual rule that was used. Note, we don't
1255 * decompose the ID and look things up that way for a few reasons. While each
1256 * UMC should map linearly to its instance/component ID, there are suggestions
1257 * that they can be renumbered. This makes it simplest to just walk over
1258 * everything (and there aren't that many things to walk over either).
1259 */
1260 static boolean_t
zen_umc_decode_find_umc_rule(const zen_umc_t * umc,zen_umc_decoder_t * dec)1261 zen_umc_decode_find_umc_rule(const zen_umc_t *umc, zen_umc_decoder_t *dec)
1262 {
1263 for (uint_t dfno = 0; dfno < umc->umc_ndfs; dfno++) {
1264 const zen_umc_df_t *df = &umc->umc_dfs[dfno];
1265 for (uint_t umcno = 0; umcno < df->zud_nchan; umcno++) {
1266 const zen_umc_chan_t *chan = &df->zud_chan[umcno];
1267
1268 if (chan->chan_fabid != dec->dec_targ_fabid) {
1269 continue;
1270 }
1271
1272 /*
1273 * At this point we have found the UMC that we were
1274 * looking for. Snapshot that and then figure out which
1275 * rule index of it corresponds to our mapping so we can
1276 * properly determine an offset. We will still use the
1277 * primary CCM rule for all other calculations.
1278 */
1279 dec->dec_umc_chan = chan;
1280 for (uint32_t ruleno = 0; ruleno < chan->chan_nrules;
1281 ruleno++) {
1282 const df_dram_rule_t *rule =
1283 &chan->chan_rules[ruleno];
1284 if ((rule->ddr_flags & DF_DRAM_F_VALID) == 0) {
1285 continue;
1286 }
1287
1288 if (dec->dec_pa >= rule->ddr_base &&
1289 dec->dec_pa < rule->ddr_limit) {
1290 dec->dec_umc_ruleno = ruleno;
1291 return (B_TRUE);
1292 }
1293 }
1294
1295 dec->dec_fail = ZEN_UMC_DECODE_F_UMC_DOESNT_HAVE_PA;
1296 return (B_FALSE);
1297 }
1298 }
1299
1300 dec->dec_fail = ZEN_UMC_DECODE_F_CANNOT_MAP_FABID;
1301 return (B_FALSE);
1302 }
1303
1304 /*
1305 * Non-hashing interleave modes system address normalization logic. See the
1306 * zen_umc.c big theory statement for more information.
1307 */
1308 static boolean_t
zen_umc_decode_normalize_nohash(const zen_umc_t * umc,zen_umc_decoder_t * dec)1309 zen_umc_decode_normalize_nohash(const zen_umc_t *umc, zen_umc_decoder_t *dec)
1310 {
1311 uint_t nbits = 0;
1312 const df_dram_rule_t *rule = dec->dec_df_rule;
1313
1314 nbits += rule->ddr_sock_ileave_bits;
1315 nbits += rule->ddr_die_ileave_bits;
1316 switch (rule->ddr_chan_ileave) {
1317 case DF_CHAN_ILEAVE_1CH:
1318 break;
1319 case DF_CHAN_ILEAVE_2CH:
1320 nbits += 1;
1321 break;
1322 case DF_CHAN_ILEAVE_4CH:
1323 nbits += 2;
1324 break;
1325 case DF_CHAN_ILEAVE_8CH:
1326 nbits += 3;
1327 break;
1328 case DF_CHAN_ILEAVE_16CH:
1329 nbits += 4;
1330 break;
1331 case DF_CHAN_ILEAVE_32CH:
1332 nbits += 5;
1333 break;
1334 default:
1335 dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP;
1336 dec->dec_fail_data = rule->ddr_chan_ileave;
1337 return (B_FALSE);
1338 }
1339
1340 /*
1341 * If we have a really simple configuration (e.g. no interleaving at
1342 * all), then make sure that we do not actually do anything here.
1343 */
1344 if (nbits > 0) {
1345 dec->dec_norm_addr = bitdel64(dec->dec_norm_addr,
1346 rule->ddr_addr_start + nbits - 1, rule->ddr_addr_start);
1347 }
1348
1349 return (B_TRUE);
1350 }
1351
1352 /*
1353 * COD/NPS system address normalization logic. See the zen_umc.c big theory
1354 * statement for more information.
1355 */
1356 static boolean_t
zen_umc_decode_normalize_hash(const zen_umc_t * umc,zen_umc_decoder_t * dec)1357 zen_umc_decode_normalize_hash(const zen_umc_t *umc, zen_umc_decoder_t *dec)
1358 {
1359 uint_t nbits = 0, nstart = 0;
1360 const df_dram_rule_t *rule = dec->dec_df_rule;
1361
1362 /*
1363 * NPS 1K hashes remove bits 8 and 9 first. Determine how many bits to
1364 * remove from the starting location. This will later be reduced based
1365 * upon how many address bits there actually are.
1366 */
1367 switch (rule->ddr_chan_ileave) {
1368 case DF_CHAN_ILEAVE_NPS4_2CH_1K:
1369 case DF_CHAN_ILEAVE_NPS2_4CH_1K:
1370 case DF_CHAN_ILEAVE_NPS1_8CH_1K:
1371 case DF_CHAN_ILEAVE_NPS1_16CH_1K:
1372 nstart = 2;
1373 break;
1374 default:
1375 nstart = 1;
1376 break;
1377 }
1378
1379 /*
1380 * NPS hashes allow for socket interleaving, COD hashes do not. Add
1381 * socket interleaving, skip die.
1382 */
1383 nbits += rule->ddr_sock_ileave_bits;
1384 switch (rule->ddr_chan_ileave) {
1385 case DF_CHAN_ILEAVE_COD4_2CH:
1386 case DF_CHAN_ILEAVE_NPS4_2CH:
1387 case DF_CHAN_ILEAVE_NPS4_2CH_1K:
1388 case DF_CHAN_ILEAVE_NPS4_2CH_2K:
1389 nbits += 1;
1390 break;
1391 case DF_CHAN_ILEAVE_COD2_4CH:
1392 case DF_CHAN_ILEAVE_NPS2_4CH:
1393 case DF_CHAN_ILEAVE_NPS2_4CH_1K:
1394 case DF_CHAN_ILEAVE_NPS2_4CH_2K:
1395 nbits += 2;
1396 break;
1397 case DF_CHAN_ILEAVE_COD1_8CH:
1398 case DF_CHAN_ILEAVE_NPS1_8CH:
1399 case DF_CHAN_ILEAVE_NPS1_8CH_1K:
1400 case DF_CHAN_ILEAVE_NPS1_8CH_2K:
1401 nbits += 3;
1402 break;
1403 case DF_CHAN_ILEAVE_NPS1_16CH_1K:
1404 case DF_CHAN_ILEAVE_NPS1_16CH_2K:
1405 nbits += 4;
1406 break;
1407 default:
1408 dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP;
1409 dec->dec_fail_data = rule->ddr_chan_ileave;
1410 }
1411
1412 /*
1413 * Don't remove more bits from the start than exist.
1414 */
1415 if (nstart > nbits) {
1416 nstart = nbits;
1417 }
1418
1419 /*
1420 * Always remove high order bits before low order bits so we don't have
1421 * to adjust the bits we need to remove.
1422 */
1423 if (nbits > nstart) {
1424 uint_t start = 12;
1425 uint_t end = start + (nbits - nstart - 1);
1426 dec->dec_norm_addr = bitdel64(dec->dec_norm_addr, end, start);
1427 }
1428
1429 dec->dec_norm_addr = bitdel64(dec->dec_norm_addr,
1430 rule->ddr_addr_start + nstart - 1, rule->ddr_addr_start);
1431 return (B_TRUE);
1432 }
1433
1434 /*
1435 * Now it's time to perform normalization of our favorite interleaving type.
1436 * Please see the comments in zen_umc.c on this to understand what we're doing
1437 * here and why.
1438 */
1439 static boolean_t
zen_umc_decode_normalize_zen3_6ch(const zen_umc_t * umc,zen_umc_decoder_t * dec)1440 zen_umc_decode_normalize_zen3_6ch(const zen_umc_t *umc, zen_umc_decoder_t *dec)
1441 {
1442 uint8_t hashes[3] = { 0 };
1443 uint_t start, end;
1444 const df_dram_rule_t *rule = dec->dec_df_rule;
1445
1446 /*
1447 * As per the theory statement, we always remove the hash bits here from
1448 * the starting address. Because this is a 6-channel config, that turns
1449 * into 3. Perform the hash again first.
1450 */
1451 zen_umc_decode_hash_zen3_6ch(rule, dec->dec_norm_addr, hashes);
1452 start = rule->ddr_addr_start;
1453 end = rule->ddr_addr_start + ZEN_UMC_COD_NBITS - 1;
1454 dec->dec_norm_addr = bitdel64(dec->dec_norm_addr, end, start);
1455
1456 /*
1457 * This is the case the theory statement warned about. This gets
1458 * normalized to the top of the DIMM's range (its two upper most bits
1459 * are set).
1460 */
1461 if (hashes[1] == 1 && hashes[2] == 1) {
1462 uint_t start = 14 - ZEN_UMC_COD_NBITS +
1463 dec->dec_umc_chan->chan_np2_space0;
1464 dec->dec_norm_addr = bitset64(dec->dec_norm_addr, start + 1,
1465 start, 0x3);
1466 }
1467
1468 return (B_TRUE);
1469 }
1470
1471 /*
1472 * Based on the algorithm of sorts described in zen_umc.c, we have a few
1473 * different phases of extraction and combination. This isn't quite like the
1474 * others where we simply delete bits.
1475 */
1476 static boolean_t
zen_umc_decode_normalize_nps_mod(const zen_umc_t * umc,zen_umc_decoder_t * dec)1477 zen_umc_decode_normalize_nps_mod(const zen_umc_t *umc, zen_umc_decoder_t *dec)
1478 {
1479 uint64_t low, high, mid;
1480 uint_t nbits, chan_mod, sock_bits, nmid_bits;
1481 uint_t mid_start, mid_end;
1482 uint8_t hashes[3] = { 0 };
1483 const df_dram_rule_t *rule = dec->dec_df_rule;
1484
1485 sock_bits = rule->ddr_sock_ileave_bits;
1486 switch (rule->ddr_chan_ileave) {
1487 case DF_CHAN_ILEAVE_NPS4_3CH:
1488 chan_mod = 3;
1489 nbits = 1;
1490 break;
1491 case DF_CHAN_ILEAVE_NPS2_5CH:
1492 chan_mod = 5;
1493 nbits = 1;
1494 break;
1495 case DF_CHAN_ILEAVE_NPS2_6CH:
1496 chan_mod = 3;
1497 nbits = 2;
1498 break;
1499 case DF_CHAN_ILEAVE_NPS1_10CH:
1500 chan_mod = 5;
1501 nbits = 2;
1502 break;
1503 case DF_CHAN_ILEAVE_NPS1_12CH:
1504 chan_mod = 3;
1505 nbits = 3;
1506 break;
1507 default:
1508 dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP;
1509 dec->dec_fail_data = rule->ddr_chan_ileave;
1510 return (B_FALSE);
1511 }
1512
1513 /*
1514 * First extract the low bit range that we're using which is everything
1515 * below the starting interleave address. We also always extract the
1516 * high bits, which are always [63:14] and divide it by the modulus.
1517 * Note, we apply the hash after any such division if needed. It becomes
1518 * the new least significant bit.
1519 */
1520 low = bitx64(dec->dec_norm_addr, rule->ddr_addr_start - 1, 0);
1521 high = bitx64(dec->dec_norm_addr, 63, 14) / chan_mod;
1522 zen_umc_decode_hash_nps_mod(rule, dec->dec_norm_addr, hashes);
1523 if (sock_bits == 0) {
1524 high = (high << 1) | hashes[0];
1525 }
1526
1527 /*
1528 * Now for the weirdest bit here, extracting the middle bits. Recall
1529 * this hash uses bit 8, then 13, then 12 (the hash order is still 8,
1530 * 12, 13, but it uses the hashes[2] before hashes[1] in
1531 * zen_umc_decode_ileave_nps_mod()). So if we're only using 1 interleave
1532 * bit, we just remove bit 8 (assuming that is our starting address) and
1533 * our range is [13:9]. If we're using two, our range becomes [12:9],
1534 * and if three, [11:9]. The 6 - nbits below comes from the fact that in
1535 * a 1 bit interleave we have 5 bits. Because our mid_start/mid_end
1536 * range is inclusive, we subtract one at the end from mid_end.
1537 */
1538 nmid_bits = 6 - nbits;
1539 mid_start = rule->ddr_addr_start + 1;
1540 mid_end = mid_start + nmid_bits - 1;
1541 mid = bitx64(dec->dec_norm_addr, mid_end, mid_start);
1542
1543 /*
1544 * Because we've been removing bits, we don't use any of the start and
1545 * ending ranges we calculated above for shifts, as that was what we
1546 * needed from the original address.
1547 */
1548 dec->dec_norm_addr = low | (mid << rule->ddr_addr_start) | (high <<
1549 (rule->ddr_addr_start + nmid_bits));
1550
1551 return (B_TRUE);
1552 }
1553
1554 /*
1555 * Construct the normalized address for the NPS 1K/2K non-power of 2 instances.
1556 * See the theory statement for the rough formula used here. While each variant
1557 * uses slightly different values, that has been abstracted based on our data
1558 * table.
1559 */
1560 static boolean_t
zen_umc_decode_normalize_nps_k_mod(const zen_umc_t * umc,zen_umc_decoder_t * dec)1561 zen_umc_decode_normalize_nps_k_mod(const zen_umc_t *umc, zen_umc_decoder_t *dec)
1562 {
1563 uint64_t high, mid, low;
1564 uint_t mid_end;
1565 const df_dram_rule_t *rule = dec->dec_df_rule;
1566 const zen_umc_np2_k_rule_t *np2 = NULL;
1567
1568 for (size_t i = 0; i < ARRAY_SIZE(zen_umc_np2_k_rules); i++) {
1569 if (rule->ddr_chan_ileave == zen_umc_np2_k_rules[i].zukr_type) {
1570 np2 = &zen_umc_np2_k_rules[i];
1571 break;
1572 }
1573 }
1574
1575 if (np2 == NULL) {
1576 dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP;
1577 dec->dec_fail_data = rule->ddr_chan_ileave;
1578 return (B_FALSE);
1579 }
1580
1581 low = bitx64(dec->dec_norm_addr, rule->ddr_addr_start - 1, 0);
1582 mid_end = np2->zukr_norm_addr + np2->zukr_norm_naddr - 1;
1583 VERIFY3U(mid_end, >=, rule->ddr_addr_start);
1584 mid = bitx64(dec->dec_norm_addr, mid_end, np2->zukr_norm_addr);
1585
1586 high = bitx64(dec->dec_norm_addr, 63, np2->zukr_high);
1587 if (np2->zukr_div_naddr > 0) {
1588 uint_t ins_end = np2->zukr_div_addr + np2->zukr_div_naddr - 1;
1589 uint64_t insert = bitx64(dec->dec_norm_addr, ins_end,
1590 np2->zukr_div_addr);
1591
1592 high = high << np2->zukr_div_naddr;
1593 high = bitset64(high, np2->zukr_div_naddr - 1, 0, insert);
1594 }
1595 high = high / np2->zukr_mod;
1596
1597 dec->dec_norm_addr = low | (mid << rule->ddr_addr_start) | (high <<
1598 (rule->ddr_addr_start + np2->zukr_norm_naddr));
1599 return (B_TRUE);
1600 }
1601
1602 /*
1603 * Now we need to go through and try to construct a normalized address using all
1604 * the information that we've gathered to date. To do this we need to take into
1605 * account all of the following transformations on the address that need to
1606 * occur. We apply modifications to the address in the following order:
1607 *
1608 * o The base address of the rule
1609 * o DRAM hole changes
1610 * o Normalization of the address due to interleaving (more fun)
1611 * o The DRAM offset register of the rule
1612 */
1613 static boolean_t
zen_umc_decode_sysaddr_to_norm(const zen_umc_t * umc,zen_umc_decoder_t * dec)1614 zen_umc_decode_sysaddr_to_norm(const zen_umc_t *umc, zen_umc_decoder_t *dec)
1615 {
1616 const zen_umc_chan_t *chan = dec->dec_umc_chan;
1617 const df_dram_rule_t *rule = dec->dec_df_rule;
1618
1619 dec->dec_norm_addr = dec->dec_pa;
1620 if (!zen_umc_adjust_dram_addr(umc, dec, &dec->dec_norm_addr,
1621 ZEN_UMC_DECODE_F_CALC_NORM_UNDERFLOW)) {
1622 return (B_FALSE);
1623 }
1624
1625 /*
1626 * Now for the most annoying part of this whole thing, normalizing based
1627 * on our actual interleave format. The reason for this is that when
1628 * interleaving is going on, it actually is removing bits that are just
1629 * being used to direct it somewhere; however, it's actually generally
1630 * speaking the same value in each location. See the big theory
1631 * statement in zen_umc.c for more information.
1632 */
1633 switch (rule->ddr_chan_ileave) {
1634 case DF_CHAN_ILEAVE_1CH:
1635 case DF_CHAN_ILEAVE_2CH:
1636 case DF_CHAN_ILEAVE_4CH:
1637 case DF_CHAN_ILEAVE_8CH:
1638 case DF_CHAN_ILEAVE_16CH:
1639 case DF_CHAN_ILEAVE_32CH:
1640 if (!zen_umc_decode_normalize_nohash(umc, dec)) {
1641 return (B_FALSE);
1642 }
1643 break;
1644 case DF_CHAN_ILEAVE_COD4_2CH:
1645 case DF_CHAN_ILEAVE_COD2_4CH:
1646 case DF_CHAN_ILEAVE_COD1_8CH:
1647 case DF_CHAN_ILEAVE_NPS4_2CH:
1648 case DF_CHAN_ILEAVE_NPS2_4CH:
1649 case DF_CHAN_ILEAVE_NPS1_8CH:
1650 case DF_CHAN_ILEAVE_NPS4_2CH_1K:
1651 case DF_CHAN_ILEAVE_NPS2_4CH_1K:
1652 case DF_CHAN_ILEAVE_NPS1_8CH_1K:
1653 case DF_CHAN_ILEAVE_NPS1_16CH_1K:
1654 case DF_CHAN_ILEAVE_NPS4_2CH_2K:
1655 case DF_CHAN_ILEAVE_NPS2_4CH_2K:
1656 case DF_CHAN_ILEAVE_NPS1_8CH_2K:
1657 case DF_CHAN_ILEAVE_NPS1_16CH_2K:
1658 if (!zen_umc_decode_normalize_hash(umc, dec)) {
1659 return (B_FALSE);
1660 }
1661 break;
1662 case DF_CHAN_ILEAVE_6CH:
1663 if (!zen_umc_decode_normalize_zen3_6ch(umc, dec)) {
1664 return (B_FALSE);
1665 }
1666 break;
1667 case DF_CHAN_ILEAVE_NPS4_3CH:
1668 case DF_CHAN_ILEAVE_NPS2_6CH:
1669 case DF_CHAN_ILEAVE_NPS1_12CH:
1670 case DF_CHAN_ILEAVE_NPS2_5CH:
1671 case DF_CHAN_ILEAVE_NPS1_10CH:
1672 if (!zen_umc_decode_normalize_nps_mod(umc, dec)) {
1673 return (B_FALSE);
1674 }
1675 break;
1676 case DF_CHAN_ILEAVE_NPS4_3CH_1K:
1677 case DF_CHAN_ILEAVE_NPS2_6CH_1K:
1678 case DF_CHAN_ILEAVE_NPS1_12CH_1K:
1679 case DF_CHAN_ILEAVE_NPS0_24CH_1K:
1680 case DF_CHAN_ILEAVE_NPS2_5CH_1K:
1681 case DF_CHAN_ILEAVE_NPS1_10CH_1K:
1682 case DF_CHAN_ILEAVE_NPS4_3CH_2K:
1683 case DF_CHAN_ILEAVE_NPS2_6CH_2K:
1684 case DF_CHAN_ILEAVE_NPS1_12CH_2K:
1685 case DF_CHAN_ILEAVE_NPS0_24CH_2K:
1686 case DF_CHAN_ILEAVE_NPS2_5CH_2K:
1687 case DF_CHAN_ILEAVE_NPS1_10CH_2K:
1688 if (!zen_umc_decode_normalize_nps_k_mod(umc, dec)) {
1689 return (B_FALSE);
1690 }
1691 break;
1692 case DF_CHAN_ILEAVE_MI3H_8CH:
1693 case DF_CHAN_ILEAVE_MI3H_16CH:
1694 case DF_CHAN_ILEAVE_MI3H_32CH:
1695 default:
1696 dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP;
1697 dec->dec_fail_data = rule->ddr_chan_ileave;
1698 return (B_FALSE);
1699 }
1700
1701 /*
1702 * Determine if this rule has an offset to apply. Note, there is never
1703 * an offset for rule 0, hence the index into this is one less than the
1704 * actual rule number. Unlike other transformations these offsets
1705 * describe the start of a normalized range. Therefore we need to
1706 * actually add this value instead of subtract.
1707 */
1708 if (dec->dec_umc_ruleno > 0) {
1709 uint32_t offno = dec->dec_umc_ruleno - 1;
1710 const chan_offset_t *offset = &chan->chan_offsets[offno];
1711
1712 if (offset->cho_valid) {
1713 dec->dec_norm_addr += offset->cho_offset;
1714 }
1715 }
1716
1717 return (B_TRUE);
1718 }
1719
1720 /*
1721 * This applies the formula that determines a chip-select actually matches which
1722 * is defined as (address & ~mask) == (base & ~mask) in the PPR. There is both a
1723 * primary and secondary mask here. We need to pay attention to which is used
1724 * (if any) for later on.
1725 */
1726 static boolean_t
zen_umc_decoder_cs_matches(const umc_cs_t * cs,const uint64_t norm,boolean_t * matched_sec)1727 zen_umc_decoder_cs_matches(const umc_cs_t *cs, const uint64_t norm,
1728 boolean_t *matched_sec)
1729 {
1730 if (cs->ucs_base.udb_valid != 0) {
1731 uint64_t imask = ~cs->ucs_base_mask;
1732 if ((norm & imask) == (cs->ucs_base.udb_base & imask)) {
1733 *matched_sec = B_FALSE;
1734 return (B_TRUE);
1735 }
1736 }
1737
1738 if (cs->ucs_sec.udb_valid != 0) {
1739 uint64_t imask = ~cs->ucs_sec_mask;
1740 if ((norm & imask) == (cs->ucs_sec.udb_base & imask)) {
1741 *matched_sec = B_TRUE;
1742 return (B_TRUE);
1743 }
1744 }
1745
1746 return (B_FALSE);
1747 }
1748
1749 /*
1750 * Go through with our normalized address and map it to a given chip-select.
1751 * This as a side effect indicates which DIMM we're going out on as well. Note,
1752 * the final DIMM can change due to chip-select hashing; however, we use this
1753 * DIMM for determining all of the actual address translations.
1754 */
1755 static boolean_t
zen_umc_decode_find_cs(const zen_umc_t * umc,zen_umc_decoder_t * dec)1756 zen_umc_decode_find_cs(const zen_umc_t *umc, zen_umc_decoder_t *dec)
1757 {
1758 const zen_umc_chan_t *chan = dec->dec_umc_chan;
1759
1760 for (uint_t dimmno = 0; dimmno < ZEN_UMC_MAX_DIMMS; dimmno++) {
1761 const umc_dimm_t *dimm = &chan->chan_dimms[dimmno];
1762
1763 if ((dimm->ud_flags & UMC_DIMM_F_VALID) == 0)
1764 continue;
1765
1766 for (uint_t csno = 0; csno < ZEN_UMC_MAX_CS_PER_DIMM; csno++) {
1767 const umc_cs_t *cs = &dimm->ud_cs[csno];
1768 boolean_t is_sec = B_FALSE;
1769
1770 if (zen_umc_decoder_cs_matches(cs, dec->dec_norm_addr,
1771 &is_sec)) {
1772 dec->dec_dimm = dimm;
1773 dec->dec_cs = cs;
1774 dec->dec_log_csno = dimmno * ZEN_UMC_MAX_DIMMS +
1775 csno;
1776 dec->dec_cs_sec = is_sec;
1777 return (B_TRUE);
1778 }
1779 }
1780 }
1781
1782 dec->dec_fail = ZEN_UMC_DECODE_F_NO_CS_BASE_MATCH;
1783 return (B_FALSE);
1784 }
1785
1786 /*
1787 * Extract the column from the address. For once, something that is almost
1788 * straightforward.
1789 */
1790 static boolean_t
zen_umc_decode_cols(const zen_umc_t * umc,zen_umc_decoder_t * dec)1791 zen_umc_decode_cols(const zen_umc_t *umc, zen_umc_decoder_t *dec)
1792 {
1793 uint32_t cols = 0;
1794 const umc_cs_t *cs = dec->dec_cs;
1795
1796 for (uint_t i = 0; i < cs->ucs_ncol; i++) {
1797 uint32_t index;
1798
1799 index = cs->ucs_col_bits[i];
1800 cols |= bitx64(dec->dec_norm_addr, index, index) << i;
1801 }
1802
1803 dec->dec_dimm_col = cols;
1804 return (B_TRUE);
1805 }
1806
1807 /*
1808 * The row is split into two different regions. There's a low and high value,
1809 * though the high value is only present in DDR4. Unlike the column, where each
1810 * bit is spelled out, each set of row bits are contiguous (low and high are
1811 * independent).
1812 */
1813 static boolean_t
zen_umc_decode_rows(const zen_umc_t * umc,zen_umc_decoder_t * dec)1814 zen_umc_decode_rows(const zen_umc_t *umc, zen_umc_decoder_t *dec)
1815 {
1816 uint32_t row = 0;
1817 uint8_t inv;
1818 const umc_cs_t *cs = dec->dec_cs;
1819 const uint_t total_bits = cs->ucs_nrow_lo + cs->ucs_nrow_hi;
1820 const uint_t lo_end = cs->ucs_nrow_lo + cs->ucs_row_low_bit - 1;
1821
1822 row = bitx64(dec->dec_norm_addr, lo_end, cs->ucs_row_low_bit);
1823 if (cs->ucs_nrow_hi > 0) {
1824 const uint_t hi_end = cs->ucs_nrow_hi + cs->ucs_row_hi_bit - 1;
1825 const uint32_t hi = bitx64(dec->dec_norm_addr, hi_end,
1826 cs->ucs_row_hi_bit);
1827
1828 row |= hi << cs->ucs_nrow_lo;
1829 }
1830
1831 if (dec->dec_cs_sec) {
1832 inv = cs->ucs_inv_msbs_sec;
1833 } else {
1834 inv = cs->ucs_inv_msbs;
1835 }
1836
1837 /*
1838 * We need to potentially invert the top two bits of the row address
1839 * based on the low two bits of the inverted register below. Note, inv
1840 * only has two valid bits below. So we shift them into place to perform
1841 * the XOR. See the big theory statement in zen_umc.c for more on why
1842 * this works.
1843 */
1844 inv = inv << (total_bits - 2);
1845 row = row ^ inv;
1846
1847 dec->dec_dimm_row = row;
1848 return (B_TRUE);
1849 }
1850
1851 /*
1852 * Several of the hash schemes ask us to go through and xor all the bits that
1853 * are in an address to transform it into a single bit. This implements that for
1854 * a uint32_t. This is basically a bitwise XOR reduce.
1855 */
1856 static uint8_t
zen_umc_running_xor32(const uint32_t in)1857 zen_umc_running_xor32(const uint32_t in)
1858 {
1859 uint8_t run = 0;
1860
1861 for (uint_t i = 0; i < sizeof (in) * NBBY; i++) {
1862 run ^= bitx32(in, i, i);
1863 }
1864
1865 return (run);
1866 }
1867
1868 static uint8_t
zen_umc_running_xor64(const uint64_t in)1869 zen_umc_running_xor64(const uint64_t in)
1870 {
1871 uint8_t run = 0;
1872
1873 for (uint_t i = 0; i < sizeof (in) * NBBY; i++) {
1874 run ^= bitx64(in, i, i);
1875 }
1876
1877 return (run);
1878 }
1879
1880 /*
1881 * Our goal here is to extract the number of banks and bank groups that are
1882 * used, if any.
1883 */
1884 static boolean_t
zen_umc_decode_banks(const zen_umc_t * umc,zen_umc_decoder_t * dec)1885 zen_umc_decode_banks(const zen_umc_t *umc, zen_umc_decoder_t *dec)
1886 {
1887 uint8_t bank = 0;
1888 const umc_cs_t *cs = dec->dec_cs;
1889 const umc_chan_hash_t *hash = &dec->dec_umc_chan->chan_hash;
1890
1891 /*
1892 * Get an initial bank address bit and then perform any hashing if
1893 * bank hashing is enabled. Note, the memory controller's nbanks is the
1894 * total number of bank and bank group bits, hence why it's used for
1895 * the loop counter.
1896 */
1897 for (uint_t i = 0; i < cs->ucs_nbanks; i++) {
1898 uint32_t row_hash, col_hash;
1899 uint8_t row_xor, col_xor;
1900 uint_t targ = cs->ucs_bank_bits[i];
1901 uint8_t val = bitx64(dec->dec_norm_addr, targ, targ);
1902 const umc_bank_hash_t *bank_hash = &hash->uch_bank_hashes[i];
1903
1904 if ((hash->uch_flags & UMC_CHAN_HASH_F_BANK) == 0 ||
1905 !hash->uch_bank_hashes[i].ubh_en) {
1906 bank |= val << i;
1907 continue;
1908 }
1909
1910 /*
1911 * See the big theory statement for more on this. Short form,
1912 * bit-wise AND the row and column, then XOR shenanigans.
1913 */
1914 row_hash = dec->dec_dimm_row & bank_hash->ubh_row_xor;
1915 col_hash = dec->dec_dimm_col & bank_hash->ubh_col_xor;
1916 row_xor = zen_umc_running_xor32(row_hash);
1917 col_xor = zen_umc_running_xor32(col_hash);
1918 bank |= (row_xor ^ col_xor ^ val) << i;
1919 }
1920
1921 /*
1922 * The bank and bank group are conjoined in the register and bit
1923 * definitions. Once we've calculated that, extract it.
1924 */
1925 dec->dec_dimm_bank_group = bitx8(bank, cs->ucs_nbank_groups - 1, 0);
1926 dec->dec_dimm_bank = bitx8(bank, cs->ucs_nbanks, cs->ucs_nbank_groups);
1927 return (B_TRUE);
1928 }
1929
1930 /*
1931 * Extract the sub-channel. If not a DDR5 based device, simply set it to zero
1932 * and return. We can't forget to hash this if required.
1933 */
1934 static boolean_t
zen_umc_decode_subchan(const zen_umc_t * umc,zen_umc_decoder_t * dec)1935 zen_umc_decode_subchan(const zen_umc_t *umc, zen_umc_decoder_t *dec)
1936 {
1937 uint8_t subchan;
1938 uint32_t row_hash, col_hash, bank_hash;
1939 uint8_t row_xor, col_xor, bank_xor;
1940 const umc_cs_t *cs = dec->dec_cs;
1941 const umc_chan_hash_t *hash = &dec->dec_umc_chan->chan_hash;
1942
1943 switch (dec->dec_umc_chan->chan_type) {
1944 case UMC_DIMM_T_DDR5:
1945 case UMC_DIMM_T_LPDDR5:
1946 break;
1947 default:
1948 dec->dec_dimm_subchan = 0;
1949 return (B_TRUE);
1950 }
1951
1952 subchan = bitx64(dec->dec_norm_addr, cs->ucs_subchan, cs->ucs_subchan);
1953 if ((hash->uch_flags & UMC_CHAN_HASH_F_PC) == 0 ||
1954 !hash->uch_pc_hash.uph_en) {
1955 dec->dec_dimm_subchan = subchan;
1956 return (B_TRUE);
1957 }
1958
1959 row_hash = dec->dec_dimm_row & hash->uch_pc_hash.uph_row_xor;
1960 col_hash = dec->dec_dimm_col & hash->uch_pc_hash.uph_col_xor;
1961 bank_hash = dec->dec_dimm_bank & hash->uch_pc_hash.uph_bank_xor;
1962 row_xor = zen_umc_running_xor32(row_hash);
1963 col_xor = zen_umc_running_xor32(col_hash);
1964 bank_xor = zen_umc_running_xor32(bank_hash);
1965
1966 dec->dec_dimm_subchan = subchan ^ row_xor ^ col_xor ^ bank_xor;
1967 return (B_TRUE);
1968 }
1969
1970 /*
1971 * Note that we have normalized the RM bits between the primary and secondary
1972 * base/mask registers so that way even though the DDR5 controller always uses
1973 * the same RM selection bits, it works in a uniform way for both DDR4 and DDR5.
1974 */
1975 static boolean_t
zen_umc_decode_rank_mul(const zen_umc_t * umc,zen_umc_decoder_t * dec)1976 zen_umc_decode_rank_mul(const zen_umc_t *umc, zen_umc_decoder_t *dec)
1977 {
1978 uint8_t rm = 0;
1979 const umc_cs_t *cs = dec->dec_cs;
1980 const umc_chan_hash_t *hash = &dec->dec_umc_chan->chan_hash;
1981
1982 for (uint_t i = 0; i < cs->ucs_nrm; i++) {
1983 uint8_t index = cs->ucs_rm_bits[i];
1984 uint8_t bit = bitx64(dec->dec_norm_addr, index, index);
1985
1986 if ((hash->uch_flags & UMC_CHAN_HASH_F_RM) != 0 &&
1987 hash->uch_rm_hashes[i].uah_en) {
1988 uint64_t norm_mask = dec->dec_norm_addr &
1989 hash->uch_rm_hashes[i].uah_addr_xor;
1990 uint8_t norm_hash = zen_umc_running_xor64(norm_mask);
1991 bit = bit ^ norm_hash;
1992 }
1993
1994 rm |= bit << i;
1995 }
1996
1997 dec->dec_dimm_rm = rm;
1998 return (B_TRUE);
1999 }
2000
2001 /*
2002 * Go through and determine the actual chip-select activated. This is subject to
2003 * hashing. Note, we first constructed a logical chip-select value based on
2004 * which of the four base/mask registers in the UMC we activated for the
2005 * channel. That basically seeded the two bit value we start with.
2006 */
2007 static boolean_t
zen_umc_decode_chipsel(const zen_umc_t * umc,zen_umc_decoder_t * dec)2008 zen_umc_decode_chipsel(const zen_umc_t *umc, zen_umc_decoder_t *dec)
2009 {
2010 uint8_t csno = 0;
2011 const umc_cs_t *cs = dec->dec_cs;
2012 const umc_chan_hash_t *hash = &dec->dec_umc_chan->chan_hash;
2013
2014 for (uint_t i = 0; i < ZEN_UMC_MAX_CS_BITS; i++) {
2015 uint8_t bit = bitx8(dec->dec_log_csno, i, i);
2016 if ((hash->uch_flags & UMC_CHAN_HASH_F_CS) != 0 &&
2017 hash->uch_cs_hashes[i].uah_en) {
2018 uint64_t mask = dec->dec_norm_addr &
2019 hash->uch_cs_hashes[i].uah_addr_xor;
2020 uint8_t rxor = zen_umc_running_xor64(mask);
2021 bit = bit ^ rxor;
2022 }
2023 csno |= bit << i;
2024 }
2025
2026 /*
2027 * It is not entirely clear what the circumstances are that we need to
2028 * apply the chip-select xor. Right now we always apply it. This only
2029 * exists on a few DDR5 SoCs, it seems, and we zero out other cases to
2030 * try and have a uniform and reasonable path. This tells us what the
2031 * absolute chip-select is in the channel. We record this for debugging
2032 * purposes and to derive the DIMM and CS.
2033 */
2034 dec->dec_chan_csno = (csno ^ cs->ucs_cs_xor) & 0x3;
2035
2036 /*
2037 * Now that we actually know which chip-select we're targeting, go back
2038 * and actual indicate which DIMM we'll go out to and what chip-select
2039 * it is relative to the DIMM. This may have changed out due to CS
2040 * hashing. As such we have to now snapshot our final DIMM and
2041 * chip-select.
2042 */
2043 dec->dec_dimm_no = dec->dec_chan_csno >> 1;
2044 dec->dec_dimm_csno = dec->dec_chan_csno % 2;
2045 return (B_TRUE);
2046 }
2047
2048 /*
2049 * Initialize the decoder state. We do this by first zeroing it all and then
2050 * setting various result addresses to the UINTXX_MAX that is appropriate. These
2051 * work as better sentinel values than zero; however, we always zero the
2052 * structure to be defensive, cover pointers, etc.
2053 */
2054 static void
zen_umc_decoder_init(zen_umc_decoder_t * dec)2055 zen_umc_decoder_init(zen_umc_decoder_t *dec)
2056 {
2057 bzero(dec, sizeof (*dec));
2058
2059 dec->dec_pa = dec->dec_ilv_pa = UINT64_MAX;
2060 dec->dec_df_ruleno = UINT32_MAX;
2061 dec->dec_ilv_sock = dec->dec_ilv_die = dec->dec_ilv_chan =
2062 dec->dec_ilv_fabid = dec->dec_log_fabid = dec->dec_remap_comp =
2063 dec->dec_targ_fabid = UINT32_MAX;
2064 dec->dec_umc_ruleno = UINT32_MAX;
2065 dec->dec_norm_addr = UINT64_MAX;
2066 dec->dec_dimm_col = dec->dec_dimm_row = UINT32_MAX;
2067 dec->dec_log_csno = dec->dec_dimm_bank = dec->dec_dimm_bank_group =
2068 dec->dec_dimm_subchan = dec->dec_dimm_rm = dec->dec_chan_csno =
2069 dec->dec_dimm_no = dec->dec_dimm_csno = UINT8_MAX;
2070 }
2071
2072 boolean_t
zen_umc_decode_pa(const zen_umc_t * umc,const uint64_t pa,zen_umc_decoder_t * dec)2073 zen_umc_decode_pa(const zen_umc_t *umc, const uint64_t pa,
2074 zen_umc_decoder_t *dec)
2075 {
2076 zen_umc_decoder_init(dec);
2077 dec->dec_pa = pa;
2078
2079 /*
2080 * Before we proceed through decoding, the first thing we should try to
2081 * do is verify that this is even something that could be DRAM.
2082 */
2083 if (!zen_umc_decode_is_dram(umc, dec)) {
2084 ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE);
2085 return (B_FALSE);
2086 }
2087
2088 /*
2089 * The very first thing that we need to do is find a data fabric rule
2090 * that corresponds to this memory address. This will be used to
2091 * determine which set of rules for interleave and related we actually
2092 * should then use.
2093 */
2094 if (!zen_umc_decode_find_df_rule(umc, dec)) {
2095 ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE);
2096 return (B_FALSE);
2097 }
2098
2099 /*
2100 * Now that we have a DF rule, we must take a more involved step of
2101 * mapping to a given CS, e.g. a specific UMC channel. This will tell us
2102 * the socket and die as well. This takes care of all the interleaving
2103 * and remapping and produces a target fabric ID.
2104 */
2105 if (!zen_umc_decode_sysaddr_to_csid(umc, dec)) {
2106 ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE);
2107 return (B_FALSE);
2108 }
2109
2110 /*
2111 * With that target ID known, now actually map this to a corresponding
2112 * UMC.
2113 */
2114 if (!zen_umc_decode_find_umc_rule(umc, dec)) {
2115 ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE);
2116 return (B_FALSE);
2117 }
2118
2119 /*
2120 * With the target and corresponding rules and offset information,
2121 * actually perform normalization.
2122 */
2123 if (!zen_umc_decode_sysaddr_to_norm(umc, dec)) {
2124 ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE);
2125 return (B_FALSE);
2126 }
2127
2128 /*
2129 * Finally, we somehow managed to actually construct a normalized
2130 * address. Now we must begin the act of transforming this channel
2131 * address into something that makes sense to address a DIMM. To start
2132 * with determine which logical chip-select, which determines where we
2133 * source all our data to use.
2134 */
2135 if (!zen_umc_decode_find_cs(umc, dec)) {
2136 ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE);
2137 return (B_FALSE);
2138 }
2139
2140 /*
2141 * Now that we have the logical chip-select matched that we're sourcing
2142 * our data from, the next this is a bit more involved: we need to
2143 * extract the row, column, rank/rank multiplication, bank, and bank
2144 * group out of all this, while taking into account all of our hashes.
2145 *
2146 * To do this, we begin by first calculating the row and column as those
2147 * will be needed to determine some of our other values here.
2148 */
2149 if (!zen_umc_decode_rows(umc, dec)) {
2150 ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE);
2151 return (B_FALSE);
2152 }
2153
2154 if (!zen_umc_decode_cols(umc, dec)) {
2155 ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE);
2156 return (B_FALSE);
2157 }
2158
2159 /*
2160 * Now that we have the rows and columns we can go through and determine
2161 * the bank and bank group. This depends on the above.
2162 */
2163 if (!zen_umc_decode_banks(umc, dec)) {
2164 ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE);
2165 return (B_FALSE);
2166 }
2167
2168 /*
2169 * If we have a DDR5 generation DIMM then we need to consider the
2170 * subchannel. This doesn't exist in DDR4 systems (the function handles
2171 * this reality). Because of potential hashing, this needs to come after
2172 * the row, column, and bank have all been determined.
2173 */
2174 if (!zen_umc_decode_subchan(umc, dec)) {
2175 ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE);
2176 return (B_FALSE);
2177 }
2178
2179 /*
2180 * Time for the last two pieces here: the actual chip select used and
2181 * then figuring out which rank, taking into account rank
2182 * multiplication. Don't worry, these both have hashing opportunities.
2183 */
2184 if (!zen_umc_decode_rank_mul(umc, dec)) {
2185 ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE);
2186 return (B_FALSE);
2187 }
2188
2189 if (!zen_umc_decode_chipsel(umc, dec)) {
2190 ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE);
2191 return (B_FALSE);
2192 }
2193
2194 /*
2195 * Somehow, that's it.
2196 */
2197 return (B_TRUE);
2198 }
2199