1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2024 Oxide Computer Company 14 */ 15 16 /* 17 * Zen UMC Decoding logic. See zen_umc.c for an overview of everything. This 18 * implements shared userland/kernel decoding. 19 */ 20 21 #include "zen_umc.h" 22 23 #ifndef _KERNEL 24 #include <strings.h> 25 #endif 26 27 /* 28 * Address constants. 29 */ 30 #define ZEN_UMC_TOM2_START 0x100000000ULL 31 #define ZEN_UMC_TOM2_RSVD_BEGIN 0xfd00000000ULL 32 #define ZEN_UMC_TOM2_RSVD_END 0x10000000000ULL 33 34 /* 35 * COD based hashing constants. 36 */ 37 #define ZEN_UMC_COD_NBITS 3 38 #define ZEN_UMC_NPS_MOD_NBITS 3 39 40 /* 41 * Enumeration that represents which parts of the NPS 1K/2K non-power of 2 hash 42 * we should use. These are ordered such their indexes correspond with the 43 * 'hashes' array indexes used in zen_umc_decode_ileave_nps_k_mod(). 44 */ 45 typedef enum { 46 ZEN_UMC_NP2_K_HASH_8 = 0, 47 ZEN_UMC_NP2_K_HASH_9, 48 ZEN_UMC_NP2_K_HASH_12, 49 ZEN_UMC_NP2_K_HASH_13 50 } zen_umc_np2_k_hash_t; 51 52 typedef struct { 53 /* 54 * Indicates what the type of this rule is. 55 */ 56 df_chan_ileave_t zukr_type; 57 /* 58 * This is the modulus that this rule uses. 59 */ 60 uint32_t zukr_mod; 61 /* 62 * Indicates that this rule requires socket interleaving. Otherwise we 63 * expect no socket interleaving to be enabled. 64 */ 65 boolean_t zukr_sock; 66 /* 67 * This is the 'high' portion of the original address that is used as 68 * part of the division and modulus logic when we take it. This bit is 69 * inclusive, e.g. a value of 12 indicates we want addr[64:12]. 70 */ 71 uint32_t zukr_high; 72 /* 73 * This indicates at what point in the modulus address the high bits 74 * should arrive at. 75 */ 76 uint32_t zukr_mod_shift; 77 /* 78 * This indicates how we should fill the remaining bits in the modulus 79 * address. This is either zero filled or an original address bit. Only 80 * address bits 8 or 9 are ever used so we cheat and treat a zero here 81 * as zero filled. Only the first zukr_mod_shift bits will be 82 * considered. This and zukr_mod_shit are used prior to the modulus 83 * calculation. 84 */ 85 uint32_t zukr_mod_fill[5]; 86 /* 87 * The next series of values defines how to construct the channel. The 88 * channel is always made up of some number of bits from the modulus 89 * value and then optionally some of the hash bits. The first value 90 * indicates how many bits to shift the resulting modulus value by. Any 91 * bit that it is shifted over by must be filled by a hashed value. The 92 * indication of which hash bit is indicated by its starting address 93 * number. 94 */ 95 uint32_t zukr_chan_mod_shift; 96 zen_umc_np2_k_hash_t zukr_chan_fill[2]; 97 /* 98 * Next, it's time to describe how to construct the normalized address. 99 * There is a portion of it which is divided by the modulus. This is 100 * always going to be the high bits, but sometimes includes additional 101 * lower parts of the physical address ORed in. The first value 102 * indicates how many consecutive address bits should be included. The 103 * second indicates the starting address. 104 */ 105 uint32_t zukr_div_addr; 106 uint32_t zukr_div_naddr; 107 /* 108 * Finally the middle portion of the normalized address. 109 */ 110 uint32_t zukr_norm_addr; 111 uint32_t zukr_norm_naddr; 112 } zen_umc_np2_k_rule_t; 113 114 const zen_umc_np2_k_rule_t zen_umc_np2_k_rules[] = { { 115 .zukr_type = DF_CHAN_ILEAVE_NPS4_3CH_1K, 116 .zukr_mod = 3, 117 .zukr_high = 12, 118 .zukr_mod_shift = 2, 119 .zukr_mod_fill = { 8, 9 }, 120 .zukr_chan_mod_shift = 0, 121 .zukr_div_addr = 8, 122 .zukr_div_naddr = 2, 123 .zukr_norm_addr = 10, 124 .zukr_norm_naddr = 2 125 }, { 126 .zukr_type = DF_CHAN_ILEAVE_NPS4_3CH_2K, 127 .zukr_mod = 3, 128 .zukr_high = 12, 129 .zukr_mod_shift = 2, 130 .zukr_mod_fill = { 0, 8 }, 131 .zukr_chan_mod_shift = 0, 132 .zukr_div_addr = 8, 133 .zukr_div_naddr = 1, 134 .zukr_norm_addr = 9, 135 .zukr_norm_naddr = 3 136 }, { 137 .zukr_type = DF_CHAN_ILEAVE_NPS2_6CH_1K, 138 .zukr_mod = 3, 139 .zukr_high = 12, 140 .zukr_mod_shift = 2, 141 .zukr_mod_fill = { 0, 9 }, 142 .zukr_chan_mod_shift = 1, 143 .zukr_chan_fill = { ZEN_UMC_NP2_K_HASH_8 }, 144 .zukr_div_addr = 9, 145 .zukr_div_naddr = 1, 146 .zukr_norm_addr = 10, 147 .zukr_norm_naddr = 2 148 }, { 149 .zukr_type = DF_CHAN_ILEAVE_NPS2_6CH_2K, 150 .zukr_mod = 3, 151 .zukr_high = 12, 152 .zukr_mod_shift = 2, 153 .zukr_mod_fill = { 0, 0 }, 154 .zukr_chan_mod_shift = 1, 155 .zukr_chan_fill = { ZEN_UMC_NP2_K_HASH_8 }, 156 .zukr_div_naddr = 0, 157 .zukr_norm_addr = 9, 158 .zukr_norm_naddr = 3 159 }, { 160 .zukr_type = DF_CHAN_ILEAVE_NPS1_12CH_1K, 161 .zukr_mod = 3, 162 .zukr_high = 12, 163 .zukr_mod_shift = 2, 164 .zukr_mod_fill = { 0, 0 }, 165 .zukr_chan_mod_shift = 2, 166 .zukr_chan_fill = { ZEN_UMC_NP2_K_HASH_8, ZEN_UMC_NP2_K_HASH_9 }, 167 .zukr_div_naddr = 0, 168 .zukr_norm_addr = 10, 169 .zukr_norm_naddr = 2 170 }, { 171 .zukr_type = DF_CHAN_ILEAVE_NPS1_12CH_1K, 172 .zukr_mod = 3, 173 .zukr_high = 12, 174 .zukr_mod_shift = 2, 175 .zukr_mod_fill = { 0, 0 }, 176 .zukr_chan_mod_shift = 2, 177 .zukr_chan_fill = { ZEN_UMC_NP2_K_HASH_8, ZEN_UMC_NP2_K_HASH_9 }, 178 .zukr_div_naddr = 0, 179 .zukr_norm_addr = 10, 180 .zukr_norm_naddr = 2 181 }, { 182 .zukr_type = DF_CHAN_ILEAVE_NPS1_12CH_2K, 183 .zukr_mod = 3, 184 .zukr_high = 13, 185 .zukr_mod_shift = 3, 186 .zukr_mod_fill = { 0, 0, 0 }, 187 .zukr_chan_mod_shift = 2, 188 .zukr_chan_fill = { ZEN_UMC_NP2_K_HASH_8, ZEN_UMC_NP2_K_HASH_12 }, 189 .zukr_div_naddr = 0, 190 .zukr_norm_addr = 9, 191 .zukr_norm_naddr = 3 192 }, { 193 .zukr_type = DF_CHAN_ILEAVE_NPS0_24CH_1K, 194 .zukr_mod = 3, 195 .zukr_sock = B_TRUE, 196 .zukr_high = 13, 197 .zukr_mod_shift = 3, 198 .zukr_mod_fill = { 0, 0, 0 }, 199 .zukr_chan_mod_shift = 2, 200 .zukr_chan_fill = { ZEN_UMC_NP2_K_HASH_9, ZEN_UMC_NP2_K_HASH_12 }, 201 .zukr_div_naddr = 0, 202 .zukr_norm_addr = 10, 203 .zukr_norm_naddr = 2 204 }, { 205 .zukr_type = DF_CHAN_ILEAVE_NPS0_24CH_2K, 206 .zukr_mod = 3, 207 .zukr_sock = B_TRUE, 208 .zukr_high = 14, 209 .zukr_mod_shift = 4, 210 .zukr_mod_fill = { 0, 0, 0, 0 }, 211 .zukr_chan_mod_shift = 2, 212 .zukr_chan_fill = { ZEN_UMC_NP2_K_HASH_12, ZEN_UMC_NP2_K_HASH_13 }, 213 .zukr_div_naddr = 0, 214 .zukr_norm_addr = 9, 215 .zukr_norm_naddr = 3 216 }, { 217 .zukr_type = DF_CHAN_ILEAVE_NPS2_5CH_1K, 218 .zukr_mod = 5, 219 .zukr_high = 12, 220 .zukr_mod_shift = 2, 221 .zukr_mod_fill = { 8, 9 }, 222 .zukr_chan_mod_shift = 0, 223 .zukr_div_addr = 8, 224 .zukr_div_naddr = 2, 225 .zukr_norm_addr = 10, 226 .zukr_norm_naddr = 2 227 }, { 228 .zukr_type = DF_CHAN_ILEAVE_NPS2_5CH_2K, 229 .zukr_mod = 5, 230 .zukr_high = 12, 231 .zukr_mod_shift = 2, 232 .zukr_mod_fill = { 0, 8 }, 233 .zukr_chan_mod_shift = 0, 234 .zukr_div_addr = 8, 235 .zukr_div_naddr = 1, 236 .zukr_norm_addr = 9, 237 .zukr_norm_naddr = 3 238 }, { 239 .zukr_type = DF_CHAN_ILEAVE_NPS1_10CH_1K, 240 .zukr_mod = 5, 241 .zukr_high = 12, 242 .zukr_mod_shift = 2, 243 .zukr_mod_fill = { 0, 9 }, 244 .zukr_chan_mod_shift = 1, 245 .zukr_chan_fill = { ZEN_UMC_NP2_K_HASH_8 }, 246 .zukr_div_addr = 9, 247 .zukr_div_naddr = 1, 248 .zukr_norm_addr = 10, 249 .zukr_norm_naddr = 2 250 }, { 251 .zukr_type = DF_CHAN_ILEAVE_NPS1_10CH_2K, 252 .zukr_mod = 5, 253 .zukr_high = 12, 254 .zukr_mod_shift = 2, 255 .zukr_mod_fill = { 0, 0 }, 256 .zukr_chan_mod_shift = 1, 257 .zukr_chan_fill = { ZEN_UMC_NP2_K_HASH_8 }, 258 .zukr_div_naddr = 0, 259 .zukr_norm_addr = 9, 260 .zukr_norm_naddr = 3 261 } }; 262 263 /* 264 * We want to apply some initial heuristics to determine if a physical address 265 * is DRAM before we proceed because of the MMIO hole and related. The DRAM 266 * ranges can overlap with these system reserved ranges so we have to manually 267 * check these. Effectively this means that we have a few valid ranges: 268 * 269 * o [ 0, TOM ) 270 * o [ 4 GiB, TOM2 ) 271 * 272 * However, the above 4 GiB runs into trouble depending on size. There is a 12 273 * GiB system reserved address region right below 1 TiB. So it really turns 274 * into the following when we have more than 1 TiB of DRAM: 275 * 276 * o [ 0, TOM ) 277 * o [ 4 GiB, 1 TiB - 12 GiB ) 278 * o [ 1 TiB, TOM2 ) 279 * 280 * Note, this does not currently scan MTRRs or MMIO rules for what might be 281 * redirected to MMIO. 282 */ 283 static boolean_t 284 zen_umc_decode_is_dram(const zen_umc_t *umc, zen_umc_decoder_t *dec) 285 { 286 if (dec->dec_pa < umc->umc_tom) { 287 return (B_TRUE); 288 } 289 290 if (dec->dec_pa >= umc->umc_tom2) { 291 dec->dec_fail = ZEN_UMC_DECODE_F_OUTSIDE_DRAM; 292 return (B_FALSE); 293 } 294 295 /* 296 * If the address is in the reserved hole around 1 TiB, do not proceed. 297 */ 298 if (dec->dec_pa >= ZEN_UMC_TOM2_RSVD_BEGIN && 299 dec->dec_pa < ZEN_UMC_TOM2_RSVD_END) { 300 dec->dec_fail = ZEN_UMC_DECODE_F_OUTSIDE_DRAM; 301 return (B_FALSE); 302 } 303 304 /* 305 * Now that we've validated we're not in the hole, check to see if we're 306 * actually in a valid region for TOM2. 307 */ 308 if (dec->dec_pa >= ZEN_UMC_TOM2_START && 309 dec->dec_pa < umc->umc_tom2) { 310 return (B_TRUE); 311 } 312 313 /* 314 * At this point we have eliminated all known DRAM regions described by 315 * TOM and TOM2, so we have to conclude that whatever we're looking at 316 * is now not part of DRAM. 317 */ 318 dec->dec_fail = ZEN_UMC_DECODE_F_OUTSIDE_DRAM; 319 return (B_FALSE); 320 } 321 322 /* 323 * In our first stop on decoding, we need to go through and take a physical 324 * address and figure out what the corresponding initial DF rule that applies 325 * is. This rule will then be used to figure out which target on the data fabric 326 * we should be going to and what interleaving rules apply. 327 * 328 * Our DRAM rule may reflect that the DRAM hole is active. In this case the 329 * specified range in the rule will be larger than the actual amount of DRAM 330 * present. MMIO accesses take priority over DRAM accesses in the core and 331 * therefore the MMIO portion of the rule is not actually decoded. When trying 332 * to match a rule we do not need to worry about that and can just look whether 333 * our physical address matches a rule. We will take into account whether 334 * hoisting should adjust the address when we translate from a system address to 335 * a normal address (e.g. an address in the channel) which will be done in a 336 * subsequent step. If an address is in the hole, that has already been 337 * accounted for. 338 * 339 * While gathering information, we have all the DRAM rules for a given CCM that 340 * corresponds to a CPU core. This allows us to review all DRAM rules in one 341 * place rather than walking through what's been assigned to each UMC instance, 342 * which only has the rules that are directed towards that particular channel 343 * and matter for determining channel offsets. 344 */ 345 static boolean_t 346 zen_umc_decode_find_df_rule(const zen_umc_t *umc, zen_umc_decoder_t *dec) 347 { 348 const zen_umc_df_t *df = &umc->umc_dfs[0]; 349 350 for (uint_t i = 0; i < df->zud_dram_nrules; i++) { 351 const df_dram_rule_t *rule = &df->zud_rules[i]; 352 353 /* 354 * If this rule is not enabled, skip it. 355 */ 356 if ((rule->ddr_flags & DF_DRAM_F_VALID) == 0) 357 continue; 358 359 if (dec->dec_pa >= rule->ddr_base && 360 dec->dec_pa < rule->ddr_limit) { 361 dec->dec_df_ruleno = i; 362 dec->dec_df_rule = rule; 363 dec->dec_df_rulesrc = df; 364 return (B_TRUE); 365 } 366 } 367 368 dec->dec_fail = ZEN_UMC_DECODE_F_NO_DF_RULE; 369 return (B_FALSE); 370 } 371 372 /* 373 * This function takes care of the common logic of adjusting an address by the 374 * base value in the rule and determining if we need to apply the DRAM hole or 375 * not. This function is used in two different places: 376 * 377 * o As part of adjusting the system address to construct the interleave 378 * address for DFv4 and Zen 3 based 6-channel hashing (see 379 * zen_umc_determine_ileave_addr() below). 380 * o As part of adjusting the system address at the beginning of normalization 381 * to a channel address. 382 * 383 * One thing to highlight is that the same adjustment we make in the first case 384 * applies to a subset of things for interleaving; however, it applies to 385 * everything when normalizing. 386 */ 387 static boolean_t 388 zen_umc_adjust_dram_addr(const zen_umc_t *umc, zen_umc_decoder_t *dec, 389 uint64_t *addrp, zen_umc_decode_failure_t errno) 390 { 391 const uint64_t init_addr = *addrp; 392 const df_dram_rule_t *rule = dec->dec_df_rule; 393 const zen_umc_df_t *df = dec->dec_df_rulesrc; 394 uint64_t mod_addr = init_addr; 395 396 ASSERT3U(init_addr, >=, rule->ddr_base); 397 ASSERT3U(init_addr, <, rule->ddr_limit); 398 mod_addr -= rule->ddr_base; 399 400 /* 401 * Determine if the hole applies to this rule. 402 */ 403 if ((rule->ddr_flags & DF_DRAM_F_HOLE) != 0 && 404 (df->zud_flags & ZEN_UMC_DF_F_HOLE_VALID) != 0 && 405 init_addr >= ZEN_UMC_TOM2_START) { 406 uint64_t hole_size; 407 hole_size = ZEN_UMC_TOM2_START - 408 umc->umc_dfs[0].zud_hole_base; 409 if (mod_addr < hole_size) { 410 dec->dec_fail = errno; 411 dec->dec_fail_data = dec->dec_df_ruleno; 412 return (B_FALSE); 413 } 414 415 mod_addr -= hole_size; 416 } 417 418 *addrp = mod_addr; 419 return (B_TRUE); 420 } 421 422 /* 423 * Take care of constructing the address we need to use for determining the 424 * interleaving target fabric id. See the big theory statement in zen_umc.c for 425 * more on this. 426 */ 427 static boolean_t 428 zen_umc_determine_ileave_addr(const zen_umc_t *umc, zen_umc_decoder_t *dec) 429 { 430 const df_dram_rule_t *rule = dec->dec_df_rule; 431 432 if ((umc->umc_df_rev <= DF_REV_3 && 433 rule->ddr_chan_ileave != DF_CHAN_ILEAVE_6CH) || 434 umc->umc_df_rev >= DF_REV_4D2) { 435 dec->dec_ilv_pa = dec->dec_pa; 436 return (B_TRUE); 437 } 438 439 dec->dec_ilv_pa = dec->dec_pa; 440 if (!zen_umc_adjust_dram_addr(umc, dec, &dec->dec_ilv_pa, 441 ZEN_UMC_DECODE_F_ILEAVE_UNDERFLOW)) { 442 return (B_FALSE); 443 } 444 445 return (B_TRUE); 446 } 447 448 /* 449 * This is a simple interleaving case where we simply extract bits. No hashing 450 * required! Per zen_umc.c, from lowest to highest, we have channel, die, and 451 * then socket bits. 452 */ 453 static boolean_t 454 zen_umc_decode_ileave_nohash(const zen_umc_t *umc, zen_umc_decoder_t *dec) 455 { 456 uint32_t nchan_bit, ndie_bit, nsock_bit, addr_bit; 457 const df_dram_rule_t *rule = dec->dec_df_rule; 458 459 nsock_bit = rule->ddr_sock_ileave_bits; 460 ndie_bit = rule->ddr_die_ileave_bits; 461 switch (rule->ddr_chan_ileave) { 462 case DF_CHAN_ILEAVE_1CH: 463 nchan_bit = 0; 464 break; 465 case DF_CHAN_ILEAVE_2CH: 466 nchan_bit = 1; 467 break; 468 case DF_CHAN_ILEAVE_4CH: 469 nchan_bit = 2; 470 break; 471 case DF_CHAN_ILEAVE_8CH: 472 nchan_bit = 3; 473 break; 474 case DF_CHAN_ILEAVE_16CH: 475 nchan_bit = 4; 476 break; 477 case DF_CHAN_ILEAVE_32CH: 478 nchan_bit = 5; 479 break; 480 default: 481 dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP; 482 dec->dec_fail_data = rule->ddr_chan_ileave; 483 return (B_FALSE); 484 } 485 486 /* 487 * Zero all of these out in case no bits are dedicated to this purpose. 488 * In those cases, then the value for this is always zero. 489 */ 490 dec->dec_ilv_sock = dec->dec_ilv_die = dec->dec_ilv_chan = 0; 491 addr_bit = rule->ddr_addr_start; 492 if (nchan_bit > 0) { 493 dec->dec_ilv_chan = bitx64(dec->dec_ilv_pa, 494 addr_bit + nchan_bit - 1, addr_bit); 495 addr_bit += nchan_bit; 496 } 497 498 if (ndie_bit > 0) { 499 dec->dec_ilv_die = bitx64(dec->dec_ilv_pa, 500 addr_bit + ndie_bit - 1, addr_bit); 501 addr_bit += ndie_bit; 502 } 503 504 if (nsock_bit > 0) { 505 dec->dec_ilv_sock = bitx64(dec->dec_ilv_pa, 506 addr_bit + nsock_bit - 1, addr_bit); 507 addr_bit += nsock_bit; 508 } 509 510 return (B_TRUE); 511 } 512 513 /* 514 * Perform the Zen 2/Zen 3 "COD" based hashing. See the zen_umc.c interleaving 515 * section of the big theory statement for an overview of how this works. 516 */ 517 static boolean_t 518 zen_umc_decode_ileave_cod(const zen_umc_t *umc, zen_umc_decoder_t *dec) 519 { 520 uint32_t nchan_bit; 521 const df_dram_rule_t *rule = dec->dec_df_rule; 522 /* 523 * The order of bits here is defined by AMD. Yes, we do use the rule's 524 * address bit first and then skip to bit 12 for the second hash bit. 525 */ 526 const uint32_t addr_bits[3] = { rule->ddr_addr_start, 12, 13 }; 527 528 if (rule->ddr_sock_ileave_bits != 0 || rule->ddr_die_ileave_bits != 0) { 529 dec->dec_fail = ZEN_UMC_DECODE_F_COD_BAD_ILEAVE; 530 dec->dec_fail_data = dec->dec_df_ruleno; 531 return (B_FALSE); 532 } 533 534 switch (rule->ddr_chan_ileave) { 535 case DF_CHAN_ILEAVE_COD4_2CH: 536 nchan_bit = 1; 537 break; 538 case DF_CHAN_ILEAVE_COD2_4CH: 539 nchan_bit = 2; 540 break; 541 case DF_CHAN_ILEAVE_COD1_8CH: 542 nchan_bit = 3; 543 break; 544 default: 545 dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP; 546 dec->dec_fail_data = rule->ddr_chan_ileave; 547 return (B_FALSE); 548 } 549 550 dec->dec_ilv_sock = dec->dec_ilv_die = dec->dec_ilv_chan = 0; 551 552 /* 553 * Proceed to calculate the address hash based on the number of bits 554 * that we have been told to use based on the DF rule. Use the flags in 555 * the rule to determine which additional address ranges to hash in. 556 */ 557 for (uint_t i = 0; i < nchan_bit; i++) { 558 uint8_t hash = 0; 559 560 hash = bitx64(dec->dec_ilv_pa, addr_bits[i], addr_bits[i]); 561 if ((rule->ddr_flags & DF_DRAM_F_HASH_16_18) != 0) { 562 uint8_t val = bitx64(dec->dec_ilv_pa, 16 + i, 16 + i); 563 hash ^= val; 564 } 565 566 if ((rule->ddr_flags & DF_DRAM_F_HASH_21_23) != 0) { 567 uint8_t val = bitx64(dec->dec_ilv_pa, 21 + i, 21 + i); 568 hash ^= val; 569 } 570 571 if ((rule->ddr_flags & DF_DRAM_F_HASH_30_32) != 0) { 572 uint8_t val = bitx64(dec->dec_ilv_pa, 30 + i, 30 + i); 573 hash ^= val; 574 } 575 576 dec->dec_ilv_chan |= hash << i; 577 } 578 579 return (B_TRUE); 580 } 581 582 /* 583 * Common logic to perform hashing across the NPS, NPS 1K, and NPS 2K variants. 584 */ 585 static void 586 zen_umc_decode_ileave_nps_common(zen_umc_decoder_t *dec, 587 const uint32_t *addr_bits, const uint32_t *adj, uint32_t nsock_bits, 588 uint32_t nchan_bits, boolean_t df4p0) 589 { 590 const df_dram_rule_t *rule = dec->dec_df_rule; 591 592 for (uint32_t i = 0; i < nchan_bits + nsock_bits; i++) { 593 uint8_t hash = 0; 594 595 hash = bitx64(dec->dec_ilv_pa, addr_bits[i], addr_bits[i]); 596 if ((rule->ddr_flags & DF_DRAM_F_HASH_16_18) != 0) { 597 uint8_t val = bitx64(dec->dec_ilv_pa, 16 + adj[i], 598 16 + adj[i]); 599 hash ^= val; 600 } 601 602 if ((rule->ddr_flags & DF_DRAM_F_HASH_21_23) != 0) { 603 uint8_t val = bitx64(dec->dec_ilv_pa, 21 + adj[i], 604 21 + adj[i]); 605 hash ^= val; 606 } 607 608 if ((rule->ddr_flags & DF_DRAM_F_HASH_30_32) != 0) { 609 uint8_t val = bitx64(dec->dec_ilv_pa, 30 + adj[i], 30 + 610 adj[i]); 611 hash ^= val; 612 } 613 614 /* 615 * While 1T is only supported in the NPS 1K/2K variant, rule 616 * normalization means this won't be set in the plain NPS case. 617 */ 618 if ((rule->ddr_flags & DF_DRAM_F_HASH_40_42) != 0) { 619 uint8_t val = bitx64(dec->dec_ilv_pa, 40 + adj[i], 620 40 + adj[i]); 621 hash ^= val; 622 } 623 624 /* 625 * If this is the first bit and we're not doing socket 626 * interleaving, then we need to add bit 14 to the running hash. 627 * This is only true for a strict DF v4.0 NPS style hash. We 628 * don't perform this for the 1K/2K variant. 629 */ 630 if (i == 0 && nsock_bits == 0 && df4p0) { 631 uint8_t val = bitx64(dec->dec_ilv_pa, 14, 14); 632 hash ^= val; 633 } 634 635 /* 636 * If socket interleaving is going on we need to store the first 637 * bit as the socket hash and then redirect the remaining bits 638 * to the channel, taking into account that the shift will be 639 * adjusted as a result. 640 */ 641 if (nsock_bits > 0) { 642 if (i == 0) { 643 dec->dec_ilv_sock = hash; 644 } else { 645 dec->dec_ilv_chan |= hash << (i - 1); 646 } 647 } else { 648 dec->dec_ilv_chan |= hash << i; 649 } 650 } 651 } 652 653 654 /* 655 * This implements the standard NPS hash for power of 2 based channel 656 * configurations that is found in DFv4. For more information, please see the 657 * interleaving portion of the zen_umc.c big theory statement. 658 */ 659 static boolean_t 660 zen_umc_decode_ileave_nps(const zen_umc_t *umc, zen_umc_decoder_t *dec) 661 { 662 uint32_t nchan_bit, nsock_bit; 663 const df_dram_rule_t *rule = dec->dec_df_rule; 664 /* 665 * The order of bits here is defined by AMD. Yes, this is start with the 666 * defined address bit and then skip to bit 12. 667 */ 668 const uint32_t addr_bits[4] = { rule->ddr_addr_start, 12, 13, 14 }; 669 const uint32_t adj[4] = { 0, 1, 2, 3 }; 670 671 if (rule->ddr_die_ileave_bits != 0) { 672 dec->dec_fail = ZEN_UMC_DECODE_F_NPS_BAD_ILEAVE; 673 dec->dec_fail_data = dec->dec_df_ruleno; 674 return (B_FALSE); 675 } 676 677 nsock_bit = rule->ddr_sock_ileave_bits; 678 switch (rule->ddr_chan_ileave) { 679 case DF_CHAN_ILEAVE_NPS4_2CH: 680 nchan_bit = 1; 681 break; 682 case DF_CHAN_ILEAVE_NPS2_4CH: 683 nchan_bit = 2; 684 break; 685 case DF_CHAN_ILEAVE_NPS1_8CH: 686 nchan_bit = 3; 687 break; 688 default: 689 dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP; 690 dec->dec_fail_data = rule->ddr_chan_ileave; 691 return (B_FALSE); 692 } 693 694 ASSERT3U(nchan_bit + nsock_bit, <=, 4); 695 dec->dec_ilv_sock = dec->dec_ilv_die = dec->dec_ilv_chan = 0; 696 697 zen_umc_decode_ileave_nps_common(dec, addr_bits, adj, nsock_bit, 698 nchan_bit, B_TRUE); 699 return (B_TRUE); 700 } 701 702 /* 703 * This implements the Zen 5 (really DF 4D2) NPS variants that work on both 1K 704 * and 2K hashing. 705 */ 706 static boolean_t 707 zen_umc_decode_ileave_nps_k(const zen_umc_t *umc, zen_umc_decoder_t *dec) 708 { 709 uint32_t nchan_bit, nsock_bit; 710 const df_dram_rule_t *rule = dec->dec_df_rule; 711 const uint32_t addr_bits_1k[5] = { rule->ddr_addr_start, 9, 12, 13, 712 14 }; 713 const uint32_t addr_bits_2k[4] = { rule->ddr_addr_start, 12, 13, 14 }; 714 const uint32_t adj_1k[5] = { 0, 1, 2, 3, 4 }; 715 const uint32_t adj_2k[4] = { 0, 2, 3, 4 }; 716 const uint32_t *addr_bits; 717 const uint32_t *adj; 718 719 if (rule->ddr_die_ileave_bits != 0 || rule->ddr_addr_start != 8) { 720 dec->dec_fail = ZEN_UMC_DECODE_F_NPS_BAD_ILEAVE; 721 dec->dec_fail_data = dec->dec_df_ruleno; 722 return (B_FALSE); 723 } 724 725 nsock_bit = rule->ddr_sock_ileave_bits; 726 switch (rule->ddr_chan_ileave) { 727 case DF_CHAN_ILEAVE_NPS4_2CH_1K: 728 case DF_CHAN_ILEAVE_NPS4_2CH_2K: 729 nchan_bit = 1; 730 break; 731 case DF_CHAN_ILEAVE_NPS2_4CH_1K: 732 case DF_CHAN_ILEAVE_NPS2_4CH_2K: 733 nchan_bit = 2; 734 break; 735 case DF_CHAN_ILEAVE_NPS1_8CH_1K: 736 case DF_CHAN_ILEAVE_NPS1_8CH_2K: 737 nchan_bit = 3; 738 break; 739 case DF_CHAN_ILEAVE_NPS1_16CH_1K: 740 case DF_CHAN_ILEAVE_NPS1_16CH_2K: 741 nchan_bit = 4; 742 break; 743 default: 744 dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP; 745 dec->dec_fail_data = rule->ddr_chan_ileave; 746 return (B_FALSE); 747 } 748 749 switch (rule->ddr_chan_ileave) { 750 case DF_CHAN_ILEAVE_NPS4_2CH_1K: 751 case DF_CHAN_ILEAVE_NPS2_4CH_1K: 752 case DF_CHAN_ILEAVE_NPS1_8CH_1K: 753 case DF_CHAN_ILEAVE_NPS1_16CH_1K: 754 ASSERT3U(nchan_bit + nsock_bit, <=, 5); 755 addr_bits = addr_bits_1k; 756 adj = adj_1k; 757 break; 758 case DF_CHAN_ILEAVE_NPS4_2CH_2K: 759 case DF_CHAN_ILEAVE_NPS2_4CH_2K: 760 case DF_CHAN_ILEAVE_NPS1_8CH_2K: 761 case DF_CHAN_ILEAVE_NPS1_16CH_2K: 762 ASSERT3U(nchan_bit + nsock_bit, <=, 4); 763 addr_bits = addr_bits_2k; 764 adj = adj_2k; 765 break; 766 default: 767 dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP; 768 dec->dec_fail_data = rule->ddr_chan_ileave; 769 return (B_FALSE); 770 } 771 772 dec->dec_ilv_sock = dec->dec_ilv_die = dec->dec_ilv_chan = 0; 773 zen_umc_decode_ileave_nps_common(dec, addr_bits, adj, nsock_bit, 774 nchan_bit, B_FALSE); 775 return (B_TRUE); 776 } 777 778 /* 779 * This implements the logic to perform the Zen 3 6ch special hash. It's worth 780 * calling out that unlike all other hash functions, this does not support the 781 * use of the DF_DRAM_F_HASH_16_18 flag. 782 */ 783 static void 784 zen_umc_decode_hash_zen3_6ch(const df_dram_rule_t *rule, uint64_t pa, 785 uint8_t hashes[3]) 786 { 787 uint32_t addr_bit = rule->ddr_addr_start; 788 /* 789 * Yes, we use these in a weird order. No, there is no 64K. 790 */ 791 const uint32_t bits_2M[3] = { 23, 21, 22 }; 792 const uint32_t bits_1G[3] = { 32, 30, 31 }; 793 794 hashes[0] = hashes[1] = hashes[2] = 0; 795 for (uint_t i = 0; i < ZEN_UMC_COD_NBITS; i++) { 796 hashes[i] = bitx64(pa, addr_bit + i, addr_bit + i); 797 if (i == 0) { 798 uint8_t val = bitx64(pa, addr_bit + 3, addr_bit + 3); 799 hashes[i] ^= val; 800 } 801 802 if ((rule->ddr_flags & DF_DRAM_F_HASH_21_23) != 0) { 803 uint8_t val = bitx64(pa, bits_2M[i], bits_2M[i]); 804 hashes[i] ^= val; 805 } 806 807 if ((rule->ddr_flags & DF_DRAM_F_HASH_30_32) != 0) { 808 uint8_t val = bitx64(pa, bits_1G[i], bits_1G[i]); 809 hashes[i] ^= val; 810 } 811 } 812 } 813 814 /* 815 * Perform Zen 3 6-channel hashing. This is pretty weird compared to others. See 816 * the zen_umc.c big theory statement for the thorny details. 817 */ 818 static boolean_t 819 zen_umc_decode_ileave_zen3_6ch(const zen_umc_t *umc, zen_umc_decoder_t *dec) 820 { 821 uint8_t hashes[3] = { 0 }; 822 const df_dram_rule_t *rule = dec->dec_df_rule; 823 uint32_t addr_bit = rule->ddr_addr_start; 824 825 if (rule->ddr_sock_ileave_bits != 0 || rule->ddr_die_ileave_bits != 0) { 826 dec->dec_fail = ZEN_UMC_DECODE_F_COD_BAD_ILEAVE; 827 dec->dec_fail_data = dec->dec_df_ruleno; 828 return (B_FALSE); 829 } 830 831 zen_umc_decode_hash_zen3_6ch(rule, dec->dec_ilv_pa, hashes); 832 dec->dec_ilv_sock = dec->dec_ilv_die = dec->dec_ilv_chan = 0; 833 dec->dec_ilv_chan = hashes[0]; 834 if (hashes[1] == 1 && hashes[2] == 1) { 835 uint64_t mod_addr = dec->dec_ilv_pa >> (addr_bit + 3); 836 dec->dec_ilv_chan |= (mod_addr % 3) << 1; 837 } else { 838 dec->dec_ilv_chan |= hashes[1] << 1; 839 dec->dec_ilv_chan |= hashes[2] << 2; 840 } 841 842 return (B_TRUE); 843 } 844 845 /* 846 * This is the standard hash function for the non-power of two based NPS hashes. 847 * See the big theory statement for more information. Unlike the normal NPS hash 848 * which uses bit 14 conditionally based on socket interleaving, here it is 849 * always used. 850 */ 851 static void 852 zen_umc_decode_hash_nps_mod(const df_dram_rule_t *rule, uint64_t pa, 853 uint8_t hashes[3]) 854 { 855 const uint32_t addr_bits[3] = { rule->ddr_addr_start, 12, 13 }; 856 857 for (uint_t i = 0; i < ZEN_UMC_NPS_MOD_NBITS; i++) { 858 hashes[i] = bitx64(pa, addr_bits[i], addr_bits[i]); 859 if (i == 0) { 860 uint8_t val = bitx64(pa, 14, 14); 861 hashes[i] ^= val; 862 } 863 864 if ((rule->ddr_flags & DF_DRAM_F_HASH_16_18) != 0) { 865 uint8_t val = bitx64(pa, 16 + i, 16 + i); 866 hashes[i] ^= val; 867 } 868 869 if ((rule->ddr_flags & DF_DRAM_F_HASH_21_23) != 0) { 870 uint8_t val = bitx64(pa, 21 + i, 21 + i); 871 hashes[i] ^= val; 872 } 873 874 if ((rule->ddr_flags & DF_DRAM_F_HASH_30_32) != 0) { 875 uint8_t val = bitx64(pa, 30 + i, 30 + i); 876 hashes[i] ^= val; 877 } 878 } 879 } 880 881 static void 882 zen_umc_decode_hash_nps_k_mod(const df_dram_rule_t *rule, uint64_t pa, 883 uint8_t hashes[4]) 884 { 885 const uint32_t addr_bits[4] = { rule->ddr_addr_start, 9, 12, 13 }; 886 887 for (size_t i = 0; i < ARRAY_SIZE(addr_bits); i++) { 888 hashes[i] = bitx64(pa, addr_bits[i], addr_bits[i]); 889 if (i == 0) { 890 uint8_t val = bitx64(pa, 14, 14); 891 hashes[i] ^= val; 892 } 893 894 if ((rule->ddr_flags & DF_DRAM_F_HASH_16_18) != 0) { 895 uint8_t val = bitx64(pa, 16 + i, 16 + i); 896 hashes[i] ^= val; 897 } 898 899 if ((rule->ddr_flags & DF_DRAM_F_HASH_21_23) != 0) { 900 uint8_t val = bitx64(pa, 21 + i, 21 + i); 901 hashes[i] ^= val; 902 } 903 904 if ((rule->ddr_flags & DF_DRAM_F_HASH_30_32) != 0) { 905 uint8_t val = bitx64(pa, 30 + i, 30 + i); 906 hashes[i] ^= val; 907 } 908 909 if ((rule->ddr_flags & DF_DRAM_F_HASH_40_42) != 0) { 910 uint8_t val = bitx64(pa, 40 + i, 40 + i); 911 hashes[i] ^= val; 912 } 913 } 914 } 915 916 /* 917 * See the big theory statement in zen_umc.c which describes the rules for this 918 * computation. This is a little less weird than the Zen 3 one, but still, 919 * unique. 920 */ 921 static boolean_t 922 zen_umc_decode_ileave_nps_mod(const zen_umc_t *umc, zen_umc_decoder_t *dec) 923 { 924 uint8_t hashes[3] = { 0 }; 925 uint32_t nsock_bit, chan_mod; 926 const df_dram_rule_t *rule = dec->dec_df_rule; 927 928 if (rule->ddr_die_ileave_bits != 0) { 929 dec->dec_fail = ZEN_UMC_DECODE_F_NPS_BAD_ILEAVE; 930 dec->dec_fail_data = dec->dec_df_ruleno; 931 return (B_FALSE); 932 } 933 934 nsock_bit = rule->ddr_sock_ileave_bits; 935 switch (rule->ddr_chan_ileave) { 936 case DF_CHAN_ILEAVE_NPS4_3CH: 937 case DF_CHAN_ILEAVE_NPS2_6CH: 938 case DF_CHAN_ILEAVE_NPS1_12CH: 939 chan_mod = 3; 940 break; 941 case DF_CHAN_ILEAVE_NPS2_5CH: 942 case DF_CHAN_ILEAVE_NPS1_10CH: 943 chan_mod = 5; 944 break; 945 default: 946 dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP; 947 dec->dec_fail_data = rule->ddr_chan_ileave; 948 return (B_FALSE); 949 } 950 951 dec->dec_ilv_sock = dec->dec_ilv_die = dec->dec_ilv_chan = 0; 952 zen_umc_decode_hash_nps_mod(rule, dec->dec_ilv_pa, hashes); 953 954 if (nsock_bit > 0) { 955 ASSERT3U(nsock_bit, ==, 1); 956 dec->dec_ilv_sock = hashes[0]; 957 } 958 959 dec->dec_ilv_chan = bitx64(dec->dec_ilv_pa, 63, 14) % chan_mod; 960 if (hashes[0] == 1) { 961 dec->dec_ilv_chan = (dec->dec_ilv_chan + 1) % chan_mod; 962 } 963 964 /* 965 * Use the remaining hash bits based on the number of channels. There is 966 * nothing else to do for 3/5 channel configs. 967 */ 968 switch (rule->ddr_chan_ileave) { 969 case DF_CHAN_ILEAVE_NPS4_3CH: 970 case DF_CHAN_ILEAVE_NPS2_5CH: 971 break; 972 case DF_CHAN_ILEAVE_NPS2_6CH: 973 case DF_CHAN_ILEAVE_NPS1_10CH: 974 dec->dec_ilv_chan += hashes[2] * chan_mod; 975 break; 976 case DF_CHAN_ILEAVE_NPS1_12CH: 977 dec->dec_ilv_chan += ((hashes[2] << 1) | hashes[1]) * chan_mod; 978 break; 979 default: 980 dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP; 981 dec->dec_fail_data = rule->ddr_chan_ileave; 982 return (B_FALSE); 983 } 984 985 return (B_TRUE); 986 } 987 988 /* 989 * Determine the interleave address for the NPS 1K/2K non-power of 2 based 990 * values. Each of these uses a similar style of calculation with rather 991 * different values and as such we use a data table for each of these that maps 992 * to a given rule. 993 */ 994 static boolean_t 995 zen_umc_decode_ileave_nps_k_mod(const zen_umc_t *umc, zen_umc_decoder_t *dec) 996 { 997 uint8_t hashes[4] = { 0 }; 998 uint32_t chan, mod_val; 999 uint64_t mod_addr; 1000 const df_dram_rule_t *rule = dec->dec_df_rule; 1001 const zen_umc_np2_k_rule_t *np2 = NULL; 1002 1003 for (size_t i = 0; i < ARRAY_SIZE(zen_umc_np2_k_rules); i++) { 1004 if (rule->ddr_chan_ileave == zen_umc_np2_k_rules[i].zukr_type) { 1005 np2 = &zen_umc_np2_k_rules[i]; 1006 break; 1007 } 1008 } 1009 1010 if (np2 == NULL) { 1011 dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP; 1012 dec->dec_fail_data = rule->ddr_chan_ileave; 1013 return (B_FALSE); 1014 } 1015 1016 if (rule->ddr_die_ileave_bits != 0 || rule->ddr_addr_start != 8) { 1017 dec->dec_fail = ZEN_UMC_DECODE_F_NPS_BAD_ILEAVE; 1018 dec->dec_fail_data = dec->dec_df_ruleno; 1019 return (B_FALSE); 1020 } 1021 1022 /* 1023 * These rules either require that socket interleaving is enabled or 1024 * not. Make sure that this matches before we proceed. 1025 */ 1026 if (np2->zukr_sock != (rule->ddr_sock_ileave_bits == 1)) { 1027 dec->dec_fail = ZEN_UMC_DECODE_F_NPS_BAD_ILEAVE; 1028 dec->dec_fail_data = dec->dec_df_ruleno; 1029 return (B_FALSE); 1030 } 1031 1032 dec->dec_ilv_sock = dec->dec_ilv_die = dec->dec_ilv_chan = 0; 1033 zen_umc_decode_hash_nps_k_mod(rule, dec->dec_ilv_pa, hashes); 1034 if (rule->ddr_sock_ileave_bits > 0) { 1035 ASSERT3U(rule->ddr_sock_ileave_bits, ==, 1); 1036 dec->dec_ilv_sock = hashes[0]; 1037 } 1038 1039 mod_addr = bitx64(dec->dec_ilv_pa, 63, np2->zukr_high); 1040 mod_addr = mod_addr << np2->zukr_mod_shift; 1041 for (uint32_t i = 0; i < np2->zukr_mod_shift; i++) { 1042 uint32_t bit = np2->zukr_mod_fill[i]; 1043 if (bit != 0) { 1044 uint64_t val = bitx64(dec->dec_ilv_pa, bit, bit); 1045 mod_addr = bitset64(mod_addr, i, i, val); 1046 } 1047 } 1048 1049 mod_val = (uint32_t)(mod_addr % np2->zukr_mod); 1050 chan = mod_val << np2->zukr_chan_mod_shift; 1051 for (uint32_t i = 0; i < np2->zukr_chan_mod_shift; i++) { 1052 VERIFY3U(np2->zukr_chan_fill[i], <, ARRAY_SIZE(hashes)); 1053 uint32_t bit = np2->zukr_chan_fill[i]; 1054 uint32_t val = hashes[np2->zukr_chan_fill[i]]; 1055 chan = bitset32(chan, bit, bit, val); 1056 } 1057 1058 dec->dec_ilv_chan = chan; 1059 return (B_TRUE); 1060 } 1061 1062 /* 1063 * Our next task is to attempt to translate the PA and the DF rule from a system 1064 * address into a normalized address and a particular DRAM channel that it's 1065 * targeting. There are several things that we need to take into account here 1066 * when performing interleaving and translation: 1067 * 1068 * o The DRAM Hole modifying our base address 1069 * o The various interleave bits 1070 * o Potentially hashing based on channel and global settings 1071 * o Potential CS re-targeting registers (only on some systems) 1072 * o Finally, the question of how to adjust for the DRAM hole and the base 1073 * address changes based on the DF generation and channel configuration. This 1074 * influences what address we start interleaving with. 1075 * 1076 * Note, this phase does not actually construct the normalized (e.g. channel) 1077 * address. That's done in a subsequent step. For more background, please see 1078 * the 'Data Fabric Interleaving' section of the zen_umc.c big theory statement. 1079 */ 1080 static boolean_t 1081 zen_umc_decode_sysaddr_to_csid(const zen_umc_t *umc, zen_umc_decoder_t *dec) 1082 { 1083 uint32_t sock, die, chan, remap_ruleset; 1084 const df_dram_rule_t *rule = dec->dec_df_rule; 1085 const zen_umc_cs_remap_t *remap; 1086 1087 /* 1088 * First, we must determine what the actual address used for 1089 * interleaving is. This varies based on the interleaving and DF 1090 * generation. 1091 */ 1092 if (!zen_umc_determine_ileave_addr(umc, dec)) { 1093 return (B_FALSE); 1094 } 1095 1096 switch (rule->ddr_chan_ileave) { 1097 case DF_CHAN_ILEAVE_1CH: 1098 case DF_CHAN_ILEAVE_2CH: 1099 case DF_CHAN_ILEAVE_4CH: 1100 case DF_CHAN_ILEAVE_8CH: 1101 case DF_CHAN_ILEAVE_16CH: 1102 case DF_CHAN_ILEAVE_32CH: 1103 if (!zen_umc_decode_ileave_nohash(umc, dec)) { 1104 return (B_FALSE); 1105 } 1106 break; 1107 case DF_CHAN_ILEAVE_COD4_2CH: 1108 case DF_CHAN_ILEAVE_COD2_4CH: 1109 case DF_CHAN_ILEAVE_COD1_8CH: 1110 if (!zen_umc_decode_ileave_cod(umc, dec)) { 1111 return (B_FALSE); 1112 } 1113 break; 1114 case DF_CHAN_ILEAVE_NPS4_2CH: 1115 case DF_CHAN_ILEAVE_NPS2_4CH: 1116 case DF_CHAN_ILEAVE_NPS1_8CH: 1117 if (!zen_umc_decode_ileave_nps(umc, dec)) { 1118 return (B_FALSE); 1119 } 1120 break; 1121 case DF_CHAN_ILEAVE_6CH: 1122 if (!zen_umc_decode_ileave_zen3_6ch(umc, dec)) { 1123 return (B_FALSE); 1124 } 1125 break; 1126 case DF_CHAN_ILEAVE_NPS4_3CH: 1127 case DF_CHAN_ILEAVE_NPS2_6CH: 1128 case DF_CHAN_ILEAVE_NPS1_12CH: 1129 case DF_CHAN_ILEAVE_NPS2_5CH: 1130 case DF_CHAN_ILEAVE_NPS1_10CH: 1131 if (!zen_umc_decode_ileave_nps_mod(umc, dec)) { 1132 return (B_FALSE); 1133 } 1134 break; 1135 case DF_CHAN_ILEAVE_NPS4_2CH_1K: 1136 case DF_CHAN_ILEAVE_NPS2_4CH_1K: 1137 case DF_CHAN_ILEAVE_NPS1_8CH_1K: 1138 case DF_CHAN_ILEAVE_NPS1_16CH_1K: 1139 case DF_CHAN_ILEAVE_NPS4_2CH_2K: 1140 case DF_CHAN_ILEAVE_NPS2_4CH_2K: 1141 case DF_CHAN_ILEAVE_NPS1_8CH_2K: 1142 case DF_CHAN_ILEAVE_NPS1_16CH_2K: 1143 if (!zen_umc_decode_ileave_nps_k(umc, dec)) { 1144 return (B_FALSE); 1145 } 1146 break; 1147 case DF_CHAN_ILEAVE_NPS4_3CH_1K: 1148 case DF_CHAN_ILEAVE_NPS2_6CH_1K: 1149 case DF_CHAN_ILEAVE_NPS1_12CH_1K: 1150 case DF_CHAN_ILEAVE_NPS0_24CH_1K: 1151 case DF_CHAN_ILEAVE_NPS2_5CH_1K: 1152 case DF_CHAN_ILEAVE_NPS1_10CH_1K: 1153 case DF_CHAN_ILEAVE_NPS4_3CH_2K: 1154 case DF_CHAN_ILEAVE_NPS2_6CH_2K: 1155 case DF_CHAN_ILEAVE_NPS1_12CH_2K: 1156 case DF_CHAN_ILEAVE_NPS0_24CH_2K: 1157 case DF_CHAN_ILEAVE_NPS2_5CH_2K: 1158 case DF_CHAN_ILEAVE_NPS1_10CH_2K: 1159 if (!zen_umc_decode_ileave_nps_k_mod(umc, dec)) { 1160 return (B_FALSE); 1161 } 1162 break; 1163 case DF_CHAN_ILEAVE_MI3H_8CH: 1164 case DF_CHAN_ILEAVE_MI3H_16CH: 1165 case DF_CHAN_ILEAVE_MI3H_32CH: 1166 default: 1167 dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP; 1168 dec->dec_fail_data = rule->ddr_chan_ileave; 1169 return (B_FALSE); 1170 } 1171 1172 /* 1173 * At this point we have dealt with decoding the interleave into the 1174 * logical elements that it contains. We need to transform that back 1175 * into a fabric ID, so we can add it to the base fabric ID in our rule. 1176 * After that, we need to see if there is any CS remapping going on. If 1177 * there is, we will replace the component part of the decomposed fabric 1178 * ID. With that done, we can then transform the components back into 1179 * our target fabric ID, which indicates which UMC we're after. 1180 */ 1181 zen_fabric_id_compose(&umc->umc_decomp, dec->dec_ilv_sock, 1182 dec->dec_ilv_die, dec->dec_ilv_chan, &dec->dec_ilv_fabid); 1183 dec->dec_log_fabid = dec->dec_ilv_fabid + rule->ddr_dest_fabid; 1184 1185 /* 1186 * If there's no remapping to do, then we're done. Simply assign the 1187 * logical ID as our target. 1188 */ 1189 zen_fabric_id_decompose(&umc->umc_decomp, dec->dec_log_fabid, &sock, 1190 &die, &chan); 1191 if ((rule->ddr_flags & DF_DRAM_F_REMAP_EN) == 0) { 1192 dec->dec_targ_fabid = dec->dec_log_fabid; 1193 return (B_TRUE); 1194 } 1195 1196 /* 1197 * The DF contains multiple remapping tables. We must figure out which 1198 * of these to actually use. There are two different ways that this can 1199 * work. The first way is the one added in DFv4 and is used since then. 1200 * In that case, the DRAM rule includes both that remapping was enabled 1201 * and which of the multiple mapping tables to use. 1202 * 1203 * This feature also exists prior to DFv4, but only in Milan. In that 1204 * world, indicated by the DF_DRAM_F_REMAP_SOCK flag, there is one table 1205 * in each DF per-socket. Based on the destination socket from the data 1206 * fabric ID, you pick the actual table to use. 1207 * 1208 * Once the table has been selected, we maintain the socket and die 1209 * portions of the fabric ID as constants and replace the component with 1210 * the one the remapping table indicates. 1211 * 1212 * Technically each DF has its own copy of the remapping tables. To make 1213 * this work we rely on the following assumption: a given DF node has to 1214 * be able to fully route all DRAM rules to a target. That is, a given 1215 * DF node doesn't really forward a system address to the remote die for 1216 * further interleave processing and therefore we must have enough 1217 * information here to map it totally from the same DF that we got the 1218 * CCM rules from in the first place, DF 0. 1219 */ 1220 if ((rule->ddr_flags & DF_DRAM_F_REMAP_SOCK) != 0) { 1221 remap_ruleset = sock; 1222 } else { 1223 remap_ruleset = rule->ddr_remap_ent; 1224 } 1225 1226 if (remap_ruleset >= dec->dec_df_rulesrc->zud_cs_nremap) { 1227 dec->dec_fail = ZEN_UMC_DECODE_F_BAD_REMAP_SET; 1228 dec->dec_fail_data = remap_ruleset; 1229 return (B_FALSE); 1230 } 1231 1232 remap = &dec->dec_df_rulesrc->zud_remap[remap_ruleset]; 1233 if (chan >= remap->csr_nremaps) { 1234 dec->dec_fail = ZEN_UMC_DECODE_F_BAD_REMAP_ENTRY; 1235 dec->dec_fail_data = chan; 1236 return (B_FALSE); 1237 } 1238 1239 dec->dec_remap_comp = remap->csr_remaps[chan]; 1240 if ((dec->dec_remap_comp & ~umc->umc_decomp.dfd_comp_mask) != 0) { 1241 dec->dec_fail = ZEN_UMC_DECODE_F_REMAP_HAS_BAD_COMP; 1242 dec->dec_fail_data = dec->dec_remap_comp; 1243 return (B_FALSE); 1244 } 1245 1246 zen_fabric_id_compose(&umc->umc_decomp, sock, die, dec->dec_remap_comp, 1247 &dec->dec_targ_fabid); 1248 1249 return (B_TRUE); 1250 } 1251 1252 /* 1253 * Our next step here is to actually take our target ID and find the 1254 * corresponding DF, UMC, and actual rule that was used. Note, we don't 1255 * decompose the ID and look things up that way for a few reasons. While each 1256 * UMC should map linearly to its instance/component ID, there are suggestions 1257 * that they can be renumbered. This makes it simplest to just walk over 1258 * everything (and there aren't that many things to walk over either). 1259 */ 1260 static boolean_t 1261 zen_umc_decode_find_umc_rule(const zen_umc_t *umc, zen_umc_decoder_t *dec) 1262 { 1263 for (uint_t dfno = 0; dfno < umc->umc_ndfs; dfno++) { 1264 const zen_umc_df_t *df = &umc->umc_dfs[dfno]; 1265 for (uint_t umcno = 0; umcno < df->zud_nchan; umcno++) { 1266 const zen_umc_chan_t *chan = &df->zud_chan[umcno]; 1267 1268 if (chan->chan_fabid != dec->dec_targ_fabid) { 1269 continue; 1270 } 1271 1272 /* 1273 * At this point we have found the UMC that we were 1274 * looking for. Snapshot that and then figure out which 1275 * rule index of it corresponds to our mapping so we can 1276 * properly determine an offset. We will still use the 1277 * primary CCM rule for all other calculations. 1278 */ 1279 dec->dec_umc_chan = chan; 1280 for (uint32_t ruleno = 0; ruleno < chan->chan_nrules; 1281 ruleno++) { 1282 const df_dram_rule_t *rule = 1283 &chan->chan_rules[ruleno]; 1284 if ((rule->ddr_flags & DF_DRAM_F_VALID) == 0) { 1285 continue; 1286 } 1287 1288 if (dec->dec_pa >= rule->ddr_base && 1289 dec->dec_pa < rule->ddr_limit) { 1290 dec->dec_umc_ruleno = ruleno; 1291 return (B_TRUE); 1292 } 1293 } 1294 1295 dec->dec_fail = ZEN_UMC_DECODE_F_UMC_DOESNT_HAVE_PA; 1296 return (B_FALSE); 1297 } 1298 } 1299 1300 dec->dec_fail = ZEN_UMC_DECODE_F_CANNOT_MAP_FABID; 1301 return (B_FALSE); 1302 } 1303 1304 /* 1305 * Non-hashing interleave modes system address normalization logic. See the 1306 * zen_umc.c big theory statement for more information. 1307 */ 1308 static boolean_t 1309 zen_umc_decode_normalize_nohash(const zen_umc_t *umc, zen_umc_decoder_t *dec) 1310 { 1311 uint_t nbits = 0; 1312 const df_dram_rule_t *rule = dec->dec_df_rule; 1313 1314 nbits += rule->ddr_sock_ileave_bits; 1315 nbits += rule->ddr_die_ileave_bits; 1316 switch (rule->ddr_chan_ileave) { 1317 case DF_CHAN_ILEAVE_1CH: 1318 break; 1319 case DF_CHAN_ILEAVE_2CH: 1320 nbits += 1; 1321 break; 1322 case DF_CHAN_ILEAVE_4CH: 1323 nbits += 2; 1324 break; 1325 case DF_CHAN_ILEAVE_8CH: 1326 nbits += 3; 1327 break; 1328 case DF_CHAN_ILEAVE_16CH: 1329 nbits += 4; 1330 break; 1331 case DF_CHAN_ILEAVE_32CH: 1332 nbits += 5; 1333 break; 1334 default: 1335 dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP; 1336 dec->dec_fail_data = rule->ddr_chan_ileave; 1337 return (B_FALSE); 1338 } 1339 1340 /* 1341 * If we have a really simple configuration (e.g. no interleaving at 1342 * all), then make sure that we do not actually do anything here. 1343 */ 1344 if (nbits > 0) { 1345 dec->dec_norm_addr = bitdel64(dec->dec_norm_addr, 1346 rule->ddr_addr_start + nbits - 1, rule->ddr_addr_start); 1347 } 1348 1349 return (B_TRUE); 1350 } 1351 1352 /* 1353 * COD/NPS system address normalization logic. See the zen_umc.c big theory 1354 * statement for more information. 1355 */ 1356 static boolean_t 1357 zen_umc_decode_normalize_hash(const zen_umc_t *umc, zen_umc_decoder_t *dec) 1358 { 1359 uint_t nbits = 0, nstart = 0; 1360 const df_dram_rule_t *rule = dec->dec_df_rule; 1361 1362 /* 1363 * NPS 1K hashes remove bits 8 and 9 first. Determine how many bits to 1364 * remove from the starting location. This will later be reduced based 1365 * upon how many address bits there actually are. 1366 */ 1367 switch (rule->ddr_chan_ileave) { 1368 case DF_CHAN_ILEAVE_NPS4_2CH_1K: 1369 case DF_CHAN_ILEAVE_NPS2_4CH_1K: 1370 case DF_CHAN_ILEAVE_NPS1_8CH_1K: 1371 case DF_CHAN_ILEAVE_NPS1_16CH_1K: 1372 nstart = 2; 1373 break; 1374 default: 1375 nstart = 1; 1376 break; 1377 } 1378 1379 /* 1380 * NPS hashes allow for socket interleaving, COD hashes do not. Add 1381 * socket interleaving, skip die. 1382 */ 1383 nbits += rule->ddr_sock_ileave_bits; 1384 switch (rule->ddr_chan_ileave) { 1385 case DF_CHAN_ILEAVE_COD4_2CH: 1386 case DF_CHAN_ILEAVE_NPS4_2CH: 1387 case DF_CHAN_ILEAVE_NPS4_2CH_1K: 1388 case DF_CHAN_ILEAVE_NPS4_2CH_2K: 1389 nbits += 1; 1390 break; 1391 case DF_CHAN_ILEAVE_COD2_4CH: 1392 case DF_CHAN_ILEAVE_NPS2_4CH: 1393 case DF_CHAN_ILEAVE_NPS2_4CH_1K: 1394 case DF_CHAN_ILEAVE_NPS2_4CH_2K: 1395 nbits += 2; 1396 break; 1397 case DF_CHAN_ILEAVE_COD1_8CH: 1398 case DF_CHAN_ILEAVE_NPS1_8CH: 1399 case DF_CHAN_ILEAVE_NPS1_8CH_1K: 1400 case DF_CHAN_ILEAVE_NPS1_8CH_2K: 1401 nbits += 3; 1402 break; 1403 case DF_CHAN_ILEAVE_NPS1_16CH_1K: 1404 case DF_CHAN_ILEAVE_NPS1_16CH_2K: 1405 nbits += 4; 1406 break; 1407 default: 1408 dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP; 1409 dec->dec_fail_data = rule->ddr_chan_ileave; 1410 } 1411 1412 /* 1413 * Don't remove more bits from the start than exist. 1414 */ 1415 if (nstart > nbits) { 1416 nstart = nbits; 1417 } 1418 1419 /* 1420 * Always remove high order bits before low order bits so we don't have 1421 * to adjust the bits we need to remove. 1422 */ 1423 if (nbits > nstart) { 1424 uint_t start = 12; 1425 uint_t end = start + (nbits - nstart - 1); 1426 dec->dec_norm_addr = bitdel64(dec->dec_norm_addr, end, start); 1427 } 1428 1429 dec->dec_norm_addr = bitdel64(dec->dec_norm_addr, 1430 rule->ddr_addr_start + nstart - 1, rule->ddr_addr_start); 1431 return (B_TRUE); 1432 } 1433 1434 /* 1435 * Now it's time to perform normalization of our favorite interleaving type. 1436 * Please see the comments in zen_umc.c on this to understand what we're doing 1437 * here and why. 1438 */ 1439 static boolean_t 1440 zen_umc_decode_normalize_zen3_6ch(const zen_umc_t *umc, zen_umc_decoder_t *dec) 1441 { 1442 uint8_t hashes[3] = { 0 }; 1443 uint_t start, end; 1444 const df_dram_rule_t *rule = dec->dec_df_rule; 1445 1446 /* 1447 * As per the theory statement, we always remove the hash bits here from 1448 * the starting address. Because this is a 6-channel config, that turns 1449 * into 3. Perform the hash again first. 1450 */ 1451 zen_umc_decode_hash_zen3_6ch(rule, dec->dec_norm_addr, hashes); 1452 start = rule->ddr_addr_start; 1453 end = rule->ddr_addr_start + ZEN_UMC_COD_NBITS - 1; 1454 dec->dec_norm_addr = bitdel64(dec->dec_norm_addr, end, start); 1455 1456 /* 1457 * This is the case the theory statement warned about. This gets 1458 * normalized to the top of the DIMM's range (its two upper most bits 1459 * are set). 1460 */ 1461 if (hashes[1] == 1 && hashes[2] == 1) { 1462 uint_t start = 14 - ZEN_UMC_COD_NBITS + 1463 dec->dec_umc_chan->chan_np2_space0; 1464 dec->dec_norm_addr = bitset64(dec->dec_norm_addr, start + 1, 1465 start, 0x3); 1466 } 1467 1468 return (B_TRUE); 1469 } 1470 1471 /* 1472 * Based on the algorithm of sorts described in zen_umc.c, we have a few 1473 * different phases of extraction and combination. This isn't quite like the 1474 * others where we simply delete bits. 1475 */ 1476 static boolean_t 1477 zen_umc_decode_normalize_nps_mod(const zen_umc_t *umc, zen_umc_decoder_t *dec) 1478 { 1479 uint64_t low, high, mid; 1480 uint_t nbits, chan_mod, sock_bits, nmid_bits; 1481 uint_t mid_start, mid_end; 1482 uint8_t hashes[3] = { 0 }; 1483 const df_dram_rule_t *rule = dec->dec_df_rule; 1484 1485 sock_bits = rule->ddr_sock_ileave_bits; 1486 switch (rule->ddr_chan_ileave) { 1487 case DF_CHAN_ILEAVE_NPS4_3CH: 1488 chan_mod = 3; 1489 nbits = 1; 1490 break; 1491 case DF_CHAN_ILEAVE_NPS2_5CH: 1492 chan_mod = 5; 1493 nbits = 1; 1494 break; 1495 case DF_CHAN_ILEAVE_NPS2_6CH: 1496 chan_mod = 3; 1497 nbits = 2; 1498 break; 1499 case DF_CHAN_ILEAVE_NPS1_10CH: 1500 chan_mod = 5; 1501 nbits = 2; 1502 break; 1503 case DF_CHAN_ILEAVE_NPS1_12CH: 1504 chan_mod = 3; 1505 nbits = 3; 1506 break; 1507 default: 1508 dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP; 1509 dec->dec_fail_data = rule->ddr_chan_ileave; 1510 return (B_FALSE); 1511 } 1512 1513 /* 1514 * First extract the low bit range that we're using which is everything 1515 * below the starting interleave address. We also always extract the 1516 * high bits, which are always [63:14] and divide it by the modulus. 1517 * Note, we apply the hash after any such division if needed. It becomes 1518 * the new least significant bit. 1519 */ 1520 low = bitx64(dec->dec_norm_addr, rule->ddr_addr_start - 1, 0); 1521 high = bitx64(dec->dec_norm_addr, 63, 14) / chan_mod; 1522 zen_umc_decode_hash_nps_mod(rule, dec->dec_norm_addr, hashes); 1523 if (sock_bits == 0) { 1524 high = (high << 1) | hashes[0]; 1525 } 1526 1527 /* 1528 * Now for the weirdest bit here, extracting the middle bits. Recall 1529 * this hash uses bit 8, then 13, then 12 (the hash order is still 8, 1530 * 12, 13, but it uses the hashes[2] before hashes[1] in 1531 * zen_umc_decode_ileave_nps_mod()). So if we're only using 1 interleave 1532 * bit, we just remove bit 8 (assuming that is our starting address) and 1533 * our range is [13:9]. If we're using two, our range becomes [12:9], 1534 * and if three, [11:9]. The 6 - nbits below comes from the fact that in 1535 * a 1 bit interleave we have 5 bits. Because our mid_start/mid_end 1536 * range is inclusive, we subtract one at the end from mid_end. 1537 */ 1538 nmid_bits = 6 - nbits; 1539 mid_start = rule->ddr_addr_start + 1; 1540 mid_end = mid_start + nmid_bits - 1; 1541 mid = bitx64(dec->dec_norm_addr, mid_end, mid_start); 1542 1543 /* 1544 * Because we've been removing bits, we don't use any of the start and 1545 * ending ranges we calculated above for shifts, as that was what we 1546 * needed from the original address. 1547 */ 1548 dec->dec_norm_addr = low | (mid << rule->ddr_addr_start) | (high << 1549 (rule->ddr_addr_start + nmid_bits)); 1550 1551 return (B_TRUE); 1552 } 1553 1554 /* 1555 * Construct the normalized address for the NPS 1K/2K non-power of 2 instances. 1556 * See the theory statement for the rough formula used here. While each variant 1557 * uses slightly different values, that has been abstracted based on our data 1558 * table. 1559 */ 1560 static boolean_t 1561 zen_umc_decode_normalize_nps_k_mod(const zen_umc_t *umc, zen_umc_decoder_t *dec) 1562 { 1563 uint64_t high, mid, low; 1564 uint_t mid_end; 1565 const df_dram_rule_t *rule = dec->dec_df_rule; 1566 const zen_umc_np2_k_rule_t *np2 = NULL; 1567 1568 for (size_t i = 0; i < ARRAY_SIZE(zen_umc_np2_k_rules); i++) { 1569 if (rule->ddr_chan_ileave == zen_umc_np2_k_rules[i].zukr_type) { 1570 np2 = &zen_umc_np2_k_rules[i]; 1571 break; 1572 } 1573 } 1574 1575 if (np2 == NULL) { 1576 dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP; 1577 dec->dec_fail_data = rule->ddr_chan_ileave; 1578 return (B_FALSE); 1579 } 1580 1581 low = bitx64(dec->dec_norm_addr, rule->ddr_addr_start - 1, 0); 1582 mid_end = np2->zukr_norm_addr + np2->zukr_norm_naddr - 1; 1583 VERIFY3U(mid_end, >=, rule->ddr_addr_start); 1584 mid = bitx64(dec->dec_norm_addr, mid_end, np2->zukr_norm_addr); 1585 1586 high = bitx64(dec->dec_norm_addr, 63, np2->zukr_high); 1587 if (np2->zukr_div_naddr > 0) { 1588 uint_t ins_end = np2->zukr_div_addr + np2->zukr_div_naddr - 1; 1589 uint64_t insert = bitx64(dec->dec_norm_addr, ins_end, 1590 np2->zukr_div_addr); 1591 1592 high = high << np2->zukr_div_naddr; 1593 high = bitset64(high, np2->zukr_div_naddr - 1, 0, insert); 1594 } 1595 high = high / np2->zukr_mod; 1596 1597 dec->dec_norm_addr = low | (mid << rule->ddr_addr_start) | (high << 1598 (rule->ddr_addr_start + np2->zukr_norm_naddr)); 1599 return (B_TRUE); 1600 } 1601 1602 /* 1603 * Now we need to go through and try to construct a normalized address using all 1604 * the information that we've gathered to date. To do this we need to take into 1605 * account all of the following transformations on the address that need to 1606 * occur. We apply modifications to the address in the following order: 1607 * 1608 * o The base address of the rule 1609 * o DRAM hole changes 1610 * o Normalization of the address due to interleaving (more fun) 1611 * o The DRAM offset register of the rule 1612 */ 1613 static boolean_t 1614 zen_umc_decode_sysaddr_to_norm(const zen_umc_t *umc, zen_umc_decoder_t *dec) 1615 { 1616 const zen_umc_chan_t *chan = dec->dec_umc_chan; 1617 const df_dram_rule_t *rule = dec->dec_df_rule; 1618 1619 dec->dec_norm_addr = dec->dec_pa; 1620 if (!zen_umc_adjust_dram_addr(umc, dec, &dec->dec_norm_addr, 1621 ZEN_UMC_DECODE_F_CALC_NORM_UNDERFLOW)) { 1622 return (B_FALSE); 1623 } 1624 1625 /* 1626 * Now for the most annoying part of this whole thing, normalizing based 1627 * on our actual interleave format. The reason for this is that when 1628 * interleaving is going on, it actually is removing bits that are just 1629 * being used to direct it somewhere; however, it's actually generally 1630 * speaking the same value in each location. See the big theory 1631 * statement in zen_umc.c for more information. 1632 */ 1633 switch (rule->ddr_chan_ileave) { 1634 case DF_CHAN_ILEAVE_1CH: 1635 case DF_CHAN_ILEAVE_2CH: 1636 case DF_CHAN_ILEAVE_4CH: 1637 case DF_CHAN_ILEAVE_8CH: 1638 case DF_CHAN_ILEAVE_16CH: 1639 case DF_CHAN_ILEAVE_32CH: 1640 if (!zen_umc_decode_normalize_nohash(umc, dec)) { 1641 return (B_FALSE); 1642 } 1643 break; 1644 case DF_CHAN_ILEAVE_COD4_2CH: 1645 case DF_CHAN_ILEAVE_COD2_4CH: 1646 case DF_CHAN_ILEAVE_COD1_8CH: 1647 case DF_CHAN_ILEAVE_NPS4_2CH: 1648 case DF_CHAN_ILEAVE_NPS2_4CH: 1649 case DF_CHAN_ILEAVE_NPS1_8CH: 1650 case DF_CHAN_ILEAVE_NPS4_2CH_1K: 1651 case DF_CHAN_ILEAVE_NPS2_4CH_1K: 1652 case DF_CHAN_ILEAVE_NPS1_8CH_1K: 1653 case DF_CHAN_ILEAVE_NPS1_16CH_1K: 1654 case DF_CHAN_ILEAVE_NPS4_2CH_2K: 1655 case DF_CHAN_ILEAVE_NPS2_4CH_2K: 1656 case DF_CHAN_ILEAVE_NPS1_8CH_2K: 1657 case DF_CHAN_ILEAVE_NPS1_16CH_2K: 1658 if (!zen_umc_decode_normalize_hash(umc, dec)) { 1659 return (B_FALSE); 1660 } 1661 break; 1662 case DF_CHAN_ILEAVE_6CH: 1663 if (!zen_umc_decode_normalize_zen3_6ch(umc, dec)) { 1664 return (B_FALSE); 1665 } 1666 break; 1667 case DF_CHAN_ILEAVE_NPS4_3CH: 1668 case DF_CHAN_ILEAVE_NPS2_6CH: 1669 case DF_CHAN_ILEAVE_NPS1_12CH: 1670 case DF_CHAN_ILEAVE_NPS2_5CH: 1671 case DF_CHAN_ILEAVE_NPS1_10CH: 1672 if (!zen_umc_decode_normalize_nps_mod(umc, dec)) { 1673 return (B_FALSE); 1674 } 1675 break; 1676 case DF_CHAN_ILEAVE_NPS4_3CH_1K: 1677 case DF_CHAN_ILEAVE_NPS2_6CH_1K: 1678 case DF_CHAN_ILEAVE_NPS1_12CH_1K: 1679 case DF_CHAN_ILEAVE_NPS0_24CH_1K: 1680 case DF_CHAN_ILEAVE_NPS2_5CH_1K: 1681 case DF_CHAN_ILEAVE_NPS1_10CH_1K: 1682 case DF_CHAN_ILEAVE_NPS4_3CH_2K: 1683 case DF_CHAN_ILEAVE_NPS2_6CH_2K: 1684 case DF_CHAN_ILEAVE_NPS1_12CH_2K: 1685 case DF_CHAN_ILEAVE_NPS0_24CH_2K: 1686 case DF_CHAN_ILEAVE_NPS2_5CH_2K: 1687 case DF_CHAN_ILEAVE_NPS1_10CH_2K: 1688 if (!zen_umc_decode_normalize_nps_k_mod(umc, dec)) { 1689 return (B_FALSE); 1690 } 1691 break; 1692 case DF_CHAN_ILEAVE_MI3H_8CH: 1693 case DF_CHAN_ILEAVE_MI3H_16CH: 1694 case DF_CHAN_ILEAVE_MI3H_32CH: 1695 default: 1696 dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP; 1697 dec->dec_fail_data = rule->ddr_chan_ileave; 1698 return (B_FALSE); 1699 } 1700 1701 /* 1702 * Determine if this rule has an offset to apply. Note, there is never 1703 * an offset for rule 0, hence the index into this is one less than the 1704 * actual rule number. Unlike other transformations these offsets 1705 * describe the start of a normalized range. Therefore we need to 1706 * actually add this value instead of subtract. 1707 */ 1708 if (dec->dec_umc_ruleno > 0) { 1709 uint32_t offno = dec->dec_umc_ruleno - 1; 1710 const chan_offset_t *offset = &chan->chan_offsets[offno]; 1711 1712 if (offset->cho_valid) { 1713 dec->dec_norm_addr += offset->cho_offset; 1714 } 1715 } 1716 1717 return (B_TRUE); 1718 } 1719 1720 /* 1721 * This applies the formula that determines a chip-select actually matches which 1722 * is defined as (address & ~mask) == (base & ~mask) in the PPR. There is both a 1723 * primary and secondary mask here. We need to pay attention to which is used 1724 * (if any) for later on. 1725 */ 1726 static boolean_t 1727 zen_umc_decoder_cs_matches(const umc_cs_t *cs, const uint64_t norm, 1728 boolean_t *matched_sec) 1729 { 1730 if (cs->ucs_base.udb_valid != 0) { 1731 uint64_t imask = ~cs->ucs_base_mask; 1732 if ((norm & imask) == (cs->ucs_base.udb_base & imask)) { 1733 *matched_sec = B_FALSE; 1734 return (B_TRUE); 1735 } 1736 } 1737 1738 if (cs->ucs_sec.udb_valid != 0) { 1739 uint64_t imask = ~cs->ucs_sec_mask; 1740 if ((norm & imask) == (cs->ucs_sec.udb_base & imask)) { 1741 *matched_sec = B_TRUE; 1742 return (B_TRUE); 1743 } 1744 } 1745 1746 return (B_FALSE); 1747 } 1748 1749 /* 1750 * Go through with our normalized address and map it to a given chip-select. 1751 * This as a side effect indicates which DIMM we're going out on as well. Note, 1752 * the final DIMM can change due to chip-select hashing; however, we use this 1753 * DIMM for determining all of the actual address translations. 1754 */ 1755 static boolean_t 1756 zen_umc_decode_find_cs(const zen_umc_t *umc, zen_umc_decoder_t *dec) 1757 { 1758 const zen_umc_chan_t *chan = dec->dec_umc_chan; 1759 1760 for (uint_t dimmno = 0; dimmno < ZEN_UMC_MAX_DIMMS; dimmno++) { 1761 const umc_dimm_t *dimm = &chan->chan_dimms[dimmno]; 1762 1763 if ((dimm->ud_flags & UMC_DIMM_F_VALID) == 0) 1764 continue; 1765 1766 for (uint_t csno = 0; csno < ZEN_UMC_MAX_CS_PER_DIMM; csno++) { 1767 const umc_cs_t *cs = &dimm->ud_cs[csno]; 1768 boolean_t is_sec = B_FALSE; 1769 1770 if (zen_umc_decoder_cs_matches(cs, dec->dec_norm_addr, 1771 &is_sec)) { 1772 dec->dec_dimm = dimm; 1773 dec->dec_cs = cs; 1774 dec->dec_log_csno = dimmno * ZEN_UMC_MAX_DIMMS + 1775 csno; 1776 dec->dec_cs_sec = is_sec; 1777 return (B_TRUE); 1778 } 1779 } 1780 } 1781 1782 dec->dec_fail = ZEN_UMC_DECODE_F_NO_CS_BASE_MATCH; 1783 return (B_FALSE); 1784 } 1785 1786 /* 1787 * Extract the column from the address. For once, something that is almost 1788 * straightforward. 1789 */ 1790 static boolean_t 1791 zen_umc_decode_cols(const zen_umc_t *umc, zen_umc_decoder_t *dec) 1792 { 1793 uint32_t cols = 0; 1794 const umc_cs_t *cs = dec->dec_cs; 1795 1796 for (uint_t i = 0; i < cs->ucs_ncol; i++) { 1797 uint32_t index; 1798 1799 index = cs->ucs_col_bits[i]; 1800 cols |= bitx64(dec->dec_norm_addr, index, index) << i; 1801 } 1802 1803 dec->dec_dimm_col = cols; 1804 return (B_TRUE); 1805 } 1806 1807 /* 1808 * The row is split into two different regions. There's a low and high value, 1809 * though the high value is only present in DDR4. Unlike the column, where each 1810 * bit is spelled out, each set of row bits are contiguous (low and high are 1811 * independent). 1812 */ 1813 static boolean_t 1814 zen_umc_decode_rows(const zen_umc_t *umc, zen_umc_decoder_t *dec) 1815 { 1816 uint32_t row = 0; 1817 uint8_t inv; 1818 const umc_cs_t *cs = dec->dec_cs; 1819 const uint_t total_bits = cs->ucs_nrow_lo + cs->ucs_nrow_hi; 1820 const uint_t lo_end = cs->ucs_nrow_lo + cs->ucs_row_low_bit - 1; 1821 1822 row = bitx64(dec->dec_norm_addr, lo_end, cs->ucs_row_low_bit); 1823 if (cs->ucs_nrow_hi > 0) { 1824 const uint_t hi_end = cs->ucs_nrow_hi + cs->ucs_row_hi_bit - 1; 1825 const uint32_t hi = bitx64(dec->dec_norm_addr, hi_end, 1826 cs->ucs_row_hi_bit); 1827 1828 row |= hi << cs->ucs_nrow_lo; 1829 } 1830 1831 if (dec->dec_cs_sec) { 1832 inv = cs->ucs_inv_msbs_sec; 1833 } else { 1834 inv = cs->ucs_inv_msbs; 1835 } 1836 1837 /* 1838 * We need to potentially invert the top two bits of the row address 1839 * based on the low two bits of the inverted register below. Note, inv 1840 * only has two valid bits below. So we shift them into place to perform 1841 * the XOR. See the big theory statement in zen_umc.c for more on why 1842 * this works. 1843 */ 1844 inv = inv << (total_bits - 2); 1845 row = row ^ inv; 1846 1847 dec->dec_dimm_row = row; 1848 return (B_TRUE); 1849 } 1850 1851 /* 1852 * Several of the hash schemes ask us to go through and xor all the bits that 1853 * are in an address to transform it into a single bit. This implements that for 1854 * a uint32_t. This is basically a bitwise XOR reduce. 1855 */ 1856 static uint8_t 1857 zen_umc_running_xor32(const uint32_t in) 1858 { 1859 uint8_t run = 0; 1860 1861 for (uint_t i = 0; i < sizeof (in) * NBBY; i++) { 1862 run ^= bitx32(in, i, i); 1863 } 1864 1865 return (run); 1866 } 1867 1868 static uint8_t 1869 zen_umc_running_xor64(const uint64_t in) 1870 { 1871 uint8_t run = 0; 1872 1873 for (uint_t i = 0; i < sizeof (in) * NBBY; i++) { 1874 run ^= bitx64(in, i, i); 1875 } 1876 1877 return (run); 1878 } 1879 1880 /* 1881 * Our goal here is to extract the number of banks and bank groups that are 1882 * used, if any. 1883 */ 1884 static boolean_t 1885 zen_umc_decode_banks(const zen_umc_t *umc, zen_umc_decoder_t *dec) 1886 { 1887 uint8_t bank = 0; 1888 const umc_cs_t *cs = dec->dec_cs; 1889 const umc_chan_hash_t *hash = &dec->dec_umc_chan->chan_hash; 1890 1891 /* 1892 * Get an initial bank address bit and then perform any hashing if 1893 * bank hashing is enabled. Note, the memory controller's nbanks is the 1894 * total number of bank and bank group bits, hence why it's used for 1895 * the loop counter. 1896 */ 1897 for (uint_t i = 0; i < cs->ucs_nbanks; i++) { 1898 uint32_t row_hash, col_hash; 1899 uint8_t row_xor, col_xor; 1900 uint_t targ = cs->ucs_bank_bits[i]; 1901 uint8_t val = bitx64(dec->dec_norm_addr, targ, targ); 1902 const umc_bank_hash_t *bank_hash = &hash->uch_bank_hashes[i]; 1903 1904 if ((hash->uch_flags & UMC_CHAN_HASH_F_BANK) == 0 || 1905 !hash->uch_bank_hashes[i].ubh_en) { 1906 bank |= val << i; 1907 continue; 1908 } 1909 1910 /* 1911 * See the big theory statement for more on this. Short form, 1912 * bit-wise AND the row and column, then XOR shenanigans. 1913 */ 1914 row_hash = dec->dec_dimm_row & bank_hash->ubh_row_xor; 1915 col_hash = dec->dec_dimm_col & bank_hash->ubh_col_xor; 1916 row_xor = zen_umc_running_xor32(row_hash); 1917 col_xor = zen_umc_running_xor32(col_hash); 1918 bank |= (row_xor ^ col_xor ^ val) << i; 1919 } 1920 1921 /* 1922 * The bank and bank group are conjoined in the register and bit 1923 * definitions. Once we've calculated that, extract it. 1924 */ 1925 dec->dec_dimm_bank_group = bitx8(bank, cs->ucs_nbank_groups - 1, 0); 1926 dec->dec_dimm_bank = bitx8(bank, cs->ucs_nbanks, cs->ucs_nbank_groups); 1927 return (B_TRUE); 1928 } 1929 1930 /* 1931 * Extract the sub-channel. If not a DDR5 based device, simply set it to zero 1932 * and return. We can't forget to hash this if required. 1933 */ 1934 static boolean_t 1935 zen_umc_decode_subchan(const zen_umc_t *umc, zen_umc_decoder_t *dec) 1936 { 1937 uint8_t subchan; 1938 uint32_t row_hash, col_hash, bank_hash; 1939 uint8_t row_xor, col_xor, bank_xor; 1940 const umc_cs_t *cs = dec->dec_cs; 1941 const umc_chan_hash_t *hash = &dec->dec_umc_chan->chan_hash; 1942 1943 switch (dec->dec_umc_chan->chan_type) { 1944 case UMC_DIMM_T_DDR5: 1945 case UMC_DIMM_T_LPDDR5: 1946 break; 1947 default: 1948 dec->dec_dimm_subchan = 0; 1949 return (B_TRUE); 1950 } 1951 1952 subchan = bitx64(dec->dec_norm_addr, cs->ucs_subchan, cs->ucs_subchan); 1953 if ((hash->uch_flags & UMC_CHAN_HASH_F_PC) == 0 || 1954 !hash->uch_pc_hash.uph_en) { 1955 dec->dec_dimm_subchan = subchan; 1956 return (B_TRUE); 1957 } 1958 1959 row_hash = dec->dec_dimm_row & hash->uch_pc_hash.uph_row_xor; 1960 col_hash = dec->dec_dimm_col & hash->uch_pc_hash.uph_col_xor; 1961 bank_hash = dec->dec_dimm_bank & hash->uch_pc_hash.uph_bank_xor; 1962 row_xor = zen_umc_running_xor32(row_hash); 1963 col_xor = zen_umc_running_xor32(col_hash); 1964 bank_xor = zen_umc_running_xor32(bank_hash); 1965 1966 dec->dec_dimm_subchan = subchan ^ row_xor ^ col_xor ^ bank_xor; 1967 return (B_TRUE); 1968 } 1969 1970 /* 1971 * Note that we have normalized the RM bits between the primary and secondary 1972 * base/mask registers so that way even though the DDR5 controller always uses 1973 * the same RM selection bits, it works in a uniform way for both DDR4 and DDR5. 1974 */ 1975 static boolean_t 1976 zen_umc_decode_rank_mul(const zen_umc_t *umc, zen_umc_decoder_t *dec) 1977 { 1978 uint8_t rm = 0; 1979 const umc_cs_t *cs = dec->dec_cs; 1980 const umc_chan_hash_t *hash = &dec->dec_umc_chan->chan_hash; 1981 1982 for (uint_t i = 0; i < cs->ucs_nrm; i++) { 1983 uint8_t index = cs->ucs_rm_bits[i]; 1984 uint8_t bit = bitx64(dec->dec_norm_addr, index, index); 1985 1986 if ((hash->uch_flags & UMC_CHAN_HASH_F_RM) != 0 && 1987 hash->uch_rm_hashes[i].uah_en) { 1988 uint64_t norm_mask = dec->dec_norm_addr & 1989 hash->uch_rm_hashes[i].uah_addr_xor; 1990 uint8_t norm_hash = zen_umc_running_xor64(norm_mask); 1991 bit = bit ^ norm_hash; 1992 } 1993 1994 rm |= bit << i; 1995 } 1996 1997 dec->dec_dimm_rm = rm; 1998 return (B_TRUE); 1999 } 2000 2001 /* 2002 * Go through and determine the actual chip-select activated. This is subject to 2003 * hashing. Note, we first constructed a logical chip-select value based on 2004 * which of the four base/mask registers in the UMC we activated for the 2005 * channel. That basically seeded the two bit value we start with. 2006 */ 2007 static boolean_t 2008 zen_umc_decode_chipsel(const zen_umc_t *umc, zen_umc_decoder_t *dec) 2009 { 2010 uint8_t csno = 0; 2011 const umc_cs_t *cs = dec->dec_cs; 2012 const umc_chan_hash_t *hash = &dec->dec_umc_chan->chan_hash; 2013 2014 for (uint_t i = 0; i < ZEN_UMC_MAX_CS_BITS; i++) { 2015 uint8_t bit = bitx8(dec->dec_log_csno, i, i); 2016 if ((hash->uch_flags & UMC_CHAN_HASH_F_CS) != 0 && 2017 hash->uch_cs_hashes[i].uah_en) { 2018 uint64_t mask = dec->dec_norm_addr & 2019 hash->uch_cs_hashes[i].uah_addr_xor; 2020 uint8_t rxor = zen_umc_running_xor64(mask); 2021 bit = bit ^ rxor; 2022 } 2023 csno |= bit << i; 2024 } 2025 2026 /* 2027 * It is not entirely clear what the circumstances are that we need to 2028 * apply the chip-select xor. Right now we always apply it. This only 2029 * exists on a few DDR5 SoCs, it seems, and we zero out other cases to 2030 * try and have a uniform and reasonable path. This tells us what the 2031 * absolute chip-select is in the channel. We record this for debugging 2032 * purposes and to derive the DIMM and CS. 2033 */ 2034 dec->dec_chan_csno = (csno ^ cs->ucs_cs_xor) & 0x3; 2035 2036 /* 2037 * Now that we actually know which chip-select we're targeting, go back 2038 * and actual indicate which DIMM we'll go out to and what chip-select 2039 * it is relative to the DIMM. This may have changed out due to CS 2040 * hashing. As such we have to now snapshot our final DIMM and 2041 * chip-select. 2042 */ 2043 dec->dec_dimm_no = dec->dec_chan_csno >> 1; 2044 dec->dec_dimm_csno = dec->dec_chan_csno % 2; 2045 return (B_TRUE); 2046 } 2047 2048 /* 2049 * Initialize the decoder state. We do this by first zeroing it all and then 2050 * setting various result addresses to the UINTXX_MAX that is appropriate. These 2051 * work as better sentinel values than zero; however, we always zero the 2052 * structure to be defensive, cover pointers, etc. 2053 */ 2054 static void 2055 zen_umc_decoder_init(zen_umc_decoder_t *dec) 2056 { 2057 bzero(dec, sizeof (*dec)); 2058 2059 dec->dec_pa = dec->dec_ilv_pa = UINT64_MAX; 2060 dec->dec_df_ruleno = UINT32_MAX; 2061 dec->dec_ilv_sock = dec->dec_ilv_die = dec->dec_ilv_chan = 2062 dec->dec_ilv_fabid = dec->dec_log_fabid = dec->dec_remap_comp = 2063 dec->dec_targ_fabid = UINT32_MAX; 2064 dec->dec_umc_ruleno = UINT32_MAX; 2065 dec->dec_norm_addr = UINT64_MAX; 2066 dec->dec_dimm_col = dec->dec_dimm_row = UINT32_MAX; 2067 dec->dec_log_csno = dec->dec_dimm_bank = dec->dec_dimm_bank_group = 2068 dec->dec_dimm_subchan = dec->dec_dimm_rm = dec->dec_chan_csno = 2069 dec->dec_dimm_no = dec->dec_dimm_csno = UINT8_MAX; 2070 } 2071 2072 boolean_t 2073 zen_umc_decode_pa(const zen_umc_t *umc, const uint64_t pa, 2074 zen_umc_decoder_t *dec) 2075 { 2076 zen_umc_decoder_init(dec); 2077 dec->dec_pa = pa; 2078 2079 /* 2080 * Before we proceed through decoding, the first thing we should try to 2081 * do is verify that this is even something that could be DRAM. 2082 */ 2083 if (!zen_umc_decode_is_dram(umc, dec)) { 2084 ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE); 2085 return (B_FALSE); 2086 } 2087 2088 /* 2089 * The very first thing that we need to do is find a data fabric rule 2090 * that corresponds to this memory address. This will be used to 2091 * determine which set of rules for interleave and related we actually 2092 * should then use. 2093 */ 2094 if (!zen_umc_decode_find_df_rule(umc, dec)) { 2095 ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE); 2096 return (B_FALSE); 2097 } 2098 2099 /* 2100 * Now that we have a DF rule, we must take a more involved step of 2101 * mapping to a given CS, e.g. a specific UMC channel. This will tell us 2102 * the socket and die as well. This takes care of all the interleaving 2103 * and remapping and produces a target fabric ID. 2104 */ 2105 if (!zen_umc_decode_sysaddr_to_csid(umc, dec)) { 2106 ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE); 2107 return (B_FALSE); 2108 } 2109 2110 /* 2111 * With that target ID known, now actually map this to a corresponding 2112 * UMC. 2113 */ 2114 if (!zen_umc_decode_find_umc_rule(umc, dec)) { 2115 ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE); 2116 return (B_FALSE); 2117 } 2118 2119 /* 2120 * With the target and corresponding rules and offset information, 2121 * actually perform normalization. 2122 */ 2123 if (!zen_umc_decode_sysaddr_to_norm(umc, dec)) { 2124 ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE); 2125 return (B_FALSE); 2126 } 2127 2128 /* 2129 * Finally, we somehow managed to actually construct a normalized 2130 * address. Now we must begin the act of transforming this channel 2131 * address into something that makes sense to address a DIMM. To start 2132 * with determine which logical chip-select, which determines where we 2133 * source all our data to use. 2134 */ 2135 if (!zen_umc_decode_find_cs(umc, dec)) { 2136 ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE); 2137 return (B_FALSE); 2138 } 2139 2140 /* 2141 * Now that we have the logical chip-select matched that we're sourcing 2142 * our data from, the next this is a bit more involved: we need to 2143 * extract the row, column, rank/rank multiplication, bank, and bank 2144 * group out of all this, while taking into account all of our hashes. 2145 * 2146 * To do this, we begin by first calculating the row and column as those 2147 * will be needed to determine some of our other values here. 2148 */ 2149 if (!zen_umc_decode_rows(umc, dec)) { 2150 ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE); 2151 return (B_FALSE); 2152 } 2153 2154 if (!zen_umc_decode_cols(umc, dec)) { 2155 ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE); 2156 return (B_FALSE); 2157 } 2158 2159 /* 2160 * Now that we have the rows and columns we can go through and determine 2161 * the bank and bank group. This depends on the above. 2162 */ 2163 if (!zen_umc_decode_banks(umc, dec)) { 2164 ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE); 2165 return (B_FALSE); 2166 } 2167 2168 /* 2169 * If we have a DDR5 generation DIMM then we need to consider the 2170 * subchannel. This doesn't exist in DDR4 systems (the function handles 2171 * this reality). Because of potential hashing, this needs to come after 2172 * the row, column, and bank have all been determined. 2173 */ 2174 if (!zen_umc_decode_subchan(umc, dec)) { 2175 ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE); 2176 return (B_FALSE); 2177 } 2178 2179 /* 2180 * Time for the last two pieces here: the actual chip select used and 2181 * then figuring out which rank, taking into account rank 2182 * multiplication. Don't worry, these both have hashing opportunities. 2183 */ 2184 if (!zen_umc_decode_rank_mul(umc, dec)) { 2185 ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE); 2186 return (B_FALSE); 2187 } 2188 2189 if (!zen_umc_decode_chipsel(umc, dec)) { 2190 ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE); 2191 return (B_FALSE); 2192 } 2193 2194 /* 2195 * Somehow, that's it. 2196 */ 2197 return (B_TRUE); 2198 } 2199