1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2023 Oxide Computer Company 14 */ 15 16 /* 17 * Zen UMC Decoding logic. See zen_umc.c for an overview of everything. This 18 * implements shared userland/kernel decoding. 19 */ 20 21 #include "zen_umc.h" 22 23 #ifndef _KERNEL 24 #include <strings.h> 25 #endif 26 27 /* 28 * Address constants. 29 */ 30 #define ZEN_UMC_TOM2_START 0x100000000ULL 31 #define ZEN_UMC_TOM2_RSVD_BEGIN 0xfd00000000ULL 32 #define ZEN_UMC_TOM2_RSVD_END 0x10000000000ULL 33 34 /* 35 * COD based hashing constants. 36 */ 37 #define ZEN_UMC_COD_NBITS 3 38 #define ZEN_UMC_NPS_MOD_NBITS 3 39 40 /* 41 * We want to apply some initial heuristics to determine if a physical address 42 * is DRAM before we proceed because of the MMIO hole and related. The DRAM 43 * ranges can overlap with these system reserved ranges so we have to manually 44 * check these. Effectively this means that we have a few valid ranges: 45 * 46 * o [ 0, TOM ) 47 * o [ 4 GiB, TOM2 ) 48 * 49 * However, the above 4 GiB runs into trouble depending on size. There is a 12 50 * GiB system reserved address region right below 1 TiB. So it really turns 51 * into the following when we have more than 1 TiB of DRAM: 52 * 53 * o [ 0, TOM ) 54 * o [ 4 GiB, 1 TiB - 12 GiB ) 55 * o [ 1 TiB, TOM2 ) 56 * 57 * Note, this does not currently scan MTRRs or MMIO rules for what might be 58 * redirected to MMIO. 59 */ 60 static boolean_t 61 zen_umc_decode_is_dram(const zen_umc_t *umc, zen_umc_decoder_t *dec) 62 { 63 if (dec->dec_pa < umc->umc_tom) { 64 return (B_TRUE); 65 } 66 67 if (dec->dec_pa >= umc->umc_tom2) { 68 dec->dec_fail = ZEN_UMC_DECODE_F_OUTSIDE_DRAM; 69 return (B_FALSE); 70 } 71 72 /* 73 * If the address is in the reserved hole around 1 TiB, do not proceed. 74 */ 75 if (dec->dec_pa >= ZEN_UMC_TOM2_RSVD_BEGIN && 76 dec->dec_pa < ZEN_UMC_TOM2_RSVD_END) { 77 dec->dec_fail = ZEN_UMC_DECODE_F_OUTSIDE_DRAM; 78 return (B_FALSE); 79 } 80 81 /* 82 * Now that we've validated we're not in the hole, check to see if we're 83 * actually in a valid region for TOM2. 84 */ 85 if (dec->dec_pa >= ZEN_UMC_TOM2_START && 86 dec->dec_pa < umc->umc_tom2) { 87 return (B_TRUE); 88 } 89 90 /* 91 * At this point we have eliminated all known DRAM regions described by 92 * TOM and TOM2, so we have to conclude that whatever we're looking at 93 * is now not part of DRAM. 94 */ 95 dec->dec_fail = ZEN_UMC_DECODE_F_OUTSIDE_DRAM; 96 return (B_FALSE); 97 } 98 99 /* 100 * In our first stop on decoding, we need to go through and take a physical 101 * address and figure out what the corresponding initial DF rule that applies 102 * is. This rule will then be used to figure out which target on the data fabric 103 * we should be going to and what interleaving rules apply. 104 * 105 * Our DRAM rule may reflect that the DRAM hole is active. In this case the 106 * specified range in the rule will be larger than the actual amount of DRAM 107 * present. MMIO accesses take priority over DRAM accesses in the core and 108 * therefore the MMIO portion of the rule is not actually decoded. When trying 109 * to match a rule we do not need to worry about that and can just look whether 110 * our physical address matches a rule. We will take into account whether 111 * hoisting should adjust the address when we translate from a system address to 112 * a normal address (e.g. an address in the channel) which will be done in a 113 * subsequent step. If an address is in the hole, that has already been 114 * accounted for. 115 * 116 * While gathering information, we have all the DRAM rules for a given CCM that 117 * corresponds to a CPU core. This allows us to review all DRAM rules in one 118 * place rather than walking through what's been assigned to each UMC instance, 119 * which only has the rules that are directed towards that particular channel 120 * and matter for determining channel offsets. 121 */ 122 static boolean_t 123 zen_umc_decode_find_df_rule(const zen_umc_t *umc, zen_umc_decoder_t *dec) 124 { 125 const zen_umc_df_t *df = &umc->umc_dfs[0]; 126 127 for (uint_t i = 0; i < df->zud_dram_nrules; i++) { 128 const df_dram_rule_t *rule = &df->zud_rules[i]; 129 130 /* 131 * If this rule is not enabled, skip it. 132 */ 133 if ((rule->ddr_flags & DF_DRAM_F_VALID) == 0) 134 continue; 135 136 if (dec->dec_pa >= rule->ddr_base && 137 dec->dec_pa < rule->ddr_limit) { 138 dec->dec_df_ruleno = i; 139 dec->dec_df_rule = rule; 140 dec->dec_df_rulesrc = df; 141 return (B_TRUE); 142 } 143 } 144 145 dec->dec_fail = ZEN_UMC_DECODE_F_NO_DF_RULE; 146 return (B_FALSE); 147 } 148 149 /* 150 * This function takes care of the common logic of adjusting an address by the 151 * base value in the rule and determining if we need to apply the DRAM hole or 152 * not. This function is used in two different places: 153 * 154 * o As part of adjusting the system address to construct the interleave 155 * address for DFv4 and Zen 3 based 6-channel hashing (see 156 * zen_umc_determine_ileave_addr() below). 157 * o As part of adjusting the system address at the beginning of normalization 158 * to a channel address. 159 * 160 * One thing to highlight is that the same adjustment we make in the first case 161 * applies to a subset of things for interleaving; however, it applies to 162 * everything when normalizing. 163 */ 164 static boolean_t 165 zen_umc_adjust_dram_addr(const zen_umc_t *umc, zen_umc_decoder_t *dec, 166 uint64_t *addrp, zen_umc_decode_failure_t errno) 167 { 168 const uint64_t init_addr = *addrp; 169 const df_dram_rule_t *rule = dec->dec_df_rule; 170 const zen_umc_df_t *df = dec->dec_df_rulesrc; 171 uint64_t mod_addr = init_addr; 172 173 ASSERT3U(init_addr, >=, rule->ddr_base); 174 ASSERT3U(init_addr, <, rule->ddr_limit); 175 mod_addr -= rule->ddr_base; 176 177 /* 178 * Determine if the hole applies to this rule. 179 */ 180 if ((rule->ddr_flags & DF_DRAM_F_HOLE) != 0 && 181 (df->zud_flags & ZEN_UMC_DF_F_HOLE_VALID) != 0 && 182 init_addr >= ZEN_UMC_TOM2_START) { 183 uint64_t hole_size; 184 hole_size = ZEN_UMC_TOM2_START - 185 umc->umc_dfs[0].zud_hole_base; 186 if (mod_addr < hole_size) { 187 dec->dec_fail = errno; 188 dec->dec_fail_data = dec->dec_df_ruleno; 189 return (B_FALSE); 190 } 191 192 mod_addr -= hole_size; 193 } 194 195 *addrp = mod_addr; 196 return (B_TRUE); 197 } 198 199 /* 200 * Take care of constructing the address we need to use for determining the 201 * interleaving target fabric id. See the big theory statement in zen_umc.c for 202 * more on this. 203 */ 204 static boolean_t 205 zen_umc_determine_ileave_addr(const zen_umc_t *umc, zen_umc_decoder_t *dec) 206 { 207 const df_dram_rule_t *rule = dec->dec_df_rule; 208 209 if (umc->umc_df_rev <= DF_REV_3 && 210 rule->ddr_chan_ileave != DF_CHAN_ILEAVE_6CH) { 211 dec->dec_ilv_pa = dec->dec_pa; 212 return (B_TRUE); 213 } 214 215 dec->dec_ilv_pa = dec->dec_pa; 216 if (!zen_umc_adjust_dram_addr(umc, dec, &dec->dec_ilv_pa, 217 ZEN_UMC_DECODE_F_ILEAVE_UNDERFLOW)) { 218 return (B_FALSE); 219 } 220 221 return (B_TRUE); 222 } 223 224 /* 225 * This is a simple interleaving case where we simply extract bits. No hashing 226 * required! Per zen_umc.c, from lowest to highest, we have channel, die, and 227 * then socket bits. 228 */ 229 static boolean_t 230 zen_umc_decode_ileave_nohash(const zen_umc_t *umc, zen_umc_decoder_t *dec) 231 { 232 uint32_t nchan_bit, ndie_bit, nsock_bit, addr_bit; 233 const df_dram_rule_t *rule = dec->dec_df_rule; 234 235 nsock_bit = rule->ddr_sock_ileave_bits; 236 ndie_bit = rule->ddr_die_ileave_bits; 237 switch (rule->ddr_chan_ileave) { 238 case DF_CHAN_ILEAVE_1CH: 239 nchan_bit = 0; 240 break; 241 case DF_CHAN_ILEAVE_2CH: 242 nchan_bit = 1; 243 break; 244 case DF_CHAN_ILEAVE_4CH: 245 nchan_bit = 2; 246 break; 247 case DF_CHAN_ILEAVE_8CH: 248 nchan_bit = 3; 249 break; 250 case DF_CHAN_ILEAVE_16CH: 251 nchan_bit = 4; 252 break; 253 case DF_CHAN_ILEAVE_32CH: 254 nchan_bit = 5; 255 break; 256 default: 257 dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP; 258 dec->dec_fail_data = rule->ddr_chan_ileave; 259 return (B_FALSE); 260 } 261 262 /* 263 * Zero all of these out in case no bits are dedicated to this purpose. 264 * In those cases, then the value for this is always zero. 265 */ 266 dec->dec_ilv_sock = dec->dec_ilv_die = dec->dec_ilv_chan = 0; 267 addr_bit = rule->ddr_addr_start; 268 if (nchan_bit > 0) { 269 dec->dec_ilv_chan = bitx64(dec->dec_ilv_pa, 270 addr_bit + nchan_bit - 1, addr_bit); 271 addr_bit += nchan_bit; 272 } 273 274 if (ndie_bit > 0) { 275 dec->dec_ilv_die = bitx64(dec->dec_ilv_pa, 276 addr_bit + ndie_bit - 1, addr_bit); 277 addr_bit += ndie_bit; 278 } 279 280 if (nsock_bit > 0) { 281 dec->dec_ilv_sock = bitx64(dec->dec_ilv_pa, 282 addr_bit + nsock_bit - 1, addr_bit); 283 addr_bit += nsock_bit; 284 } 285 286 return (B_TRUE); 287 } 288 289 /* 290 * Perform the Zen 2/Zen 3 "COD" based hashing. See the zen_umc.c interleaving 291 * section of the big theory statement for an overview of how this works. 292 */ 293 static boolean_t 294 zen_umc_decode_ileave_cod(const zen_umc_t *umc, zen_umc_decoder_t *dec) 295 { 296 uint32_t nchan_bit; 297 const df_dram_rule_t *rule = dec->dec_df_rule; 298 /* 299 * The order of bits here is defined by AMD. Yes, we do use the rule's 300 * address bit first and then skip to bit 12 for the second hash bit. 301 */ 302 const uint32_t addr_bits[3] = { rule->ddr_addr_start, 12, 13 }; 303 304 if (rule->ddr_sock_ileave_bits != 0 || rule->ddr_die_ileave_bits != 0) { 305 dec->dec_fail = ZEN_UMC_DECODE_F_COD_BAD_ILEAVE; 306 dec->dec_fail_data = dec->dec_df_ruleno; 307 return (B_FALSE); 308 } 309 310 switch (rule->ddr_chan_ileave) { 311 case DF_CHAN_ILEAVE_COD4_2CH: 312 nchan_bit = 1; 313 break; 314 case DF_CHAN_ILEAVE_COD2_4CH: 315 nchan_bit = 2; 316 break; 317 case DF_CHAN_ILEAVE_COD1_8CH: 318 nchan_bit = 3; 319 break; 320 default: 321 dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP; 322 dec->dec_fail_data = rule->ddr_chan_ileave; 323 return (B_FALSE); 324 } 325 326 dec->dec_ilv_sock = dec->dec_ilv_die = dec->dec_ilv_chan = 0; 327 328 /* 329 * Proceed to calculate the address hash based on the number of bits 330 * that we have been told to use based on the DF rule. Use the flags in 331 * the rule to determine which additional address ranges to hash in. 332 */ 333 for (uint_t i = 0; i < nchan_bit; i++) { 334 uint8_t hash = 0; 335 336 hash = bitx64(dec->dec_ilv_pa, addr_bits[i], addr_bits[i]); 337 if ((rule->ddr_flags & DF_DRAM_F_HASH_16_18) != 0) { 338 uint8_t val = bitx64(dec->dec_ilv_pa, 16 + i, 16 + i); 339 hash ^= val; 340 } 341 342 if ((rule->ddr_flags & DF_DRAM_F_HASH_21_23) != 0) { 343 uint8_t val = bitx64(dec->dec_ilv_pa, 21 + i, 21 + i); 344 hash ^= val; 345 } 346 347 if ((rule->ddr_flags & DF_DRAM_F_HASH_30_32) != 0) { 348 uint8_t val = bitx64(dec->dec_ilv_pa, 30 + i, 30 + i); 349 hash ^= val; 350 } 351 352 dec->dec_ilv_chan |= hash << i; 353 } 354 355 return (B_TRUE); 356 } 357 358 /* 359 * This implements the standard NPS hash for power of 2 based channel 360 * configurations that is found in DFv4. For more information, please see the 361 * interleaving portion of the zen_umc.c big theory statement. 362 */ 363 static boolean_t 364 zen_umc_decode_ileave_nps(const zen_umc_t *umc, zen_umc_decoder_t *dec) 365 { 366 uint32_t nchan_bit, nsock_bit; 367 const df_dram_rule_t *rule = dec->dec_df_rule; 368 /* 369 * The order of bits here is defined by AMD. Yes, this is start with the 370 * defined address bit and then skip to bit 12. 371 */ 372 const uint32_t addr_bits[4] = { rule->ddr_addr_start, 12, 13, 14 }; 373 374 if (rule->ddr_die_ileave_bits != 0) { 375 dec->dec_fail = ZEN_UMC_DECODE_F_NPS_BAD_ILEAVE; 376 dec->dec_fail_data = dec->dec_df_ruleno; 377 return (B_FALSE); 378 } 379 380 nsock_bit = rule->ddr_sock_ileave_bits; 381 switch (rule->ddr_chan_ileave) { 382 case DF_CHAN_ILEAVE_NPS4_2CH: 383 nchan_bit = 1; 384 break; 385 case DF_CHAN_ILEAVE_NPS2_4CH: 386 nchan_bit = 2; 387 break; 388 case DF_CHAN_ILEAVE_NPS1_8CH: 389 nchan_bit = 3; 390 break; 391 default: 392 dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP; 393 dec->dec_fail_data = rule->ddr_chan_ileave; 394 return (B_FALSE); 395 } 396 397 ASSERT3U(nchan_bit + nsock_bit, <=, 4); 398 dec->dec_ilv_sock = dec->dec_ilv_die = dec->dec_ilv_chan = 0; 399 400 for (uint_t i = 0; i < nchan_bit + nsock_bit; i++) { 401 uint8_t hash = 0; 402 403 hash = bitx64(dec->dec_ilv_pa, addr_bits[i], addr_bits[i]); 404 if ((rule->ddr_flags & DF_DRAM_F_HASH_16_18) != 0) { 405 uint8_t val = bitx64(dec->dec_ilv_pa, 16 + i, 16 + i); 406 hash ^= val; 407 } 408 409 if ((rule->ddr_flags & DF_DRAM_F_HASH_21_23) != 0) { 410 uint8_t val = bitx64(dec->dec_ilv_pa, 21 + i, 21 + i); 411 hash ^= val; 412 } 413 414 if ((rule->ddr_flags & DF_DRAM_F_HASH_30_32) != 0) { 415 uint8_t val = bitx64(dec->dec_ilv_pa, 30 + i, 30 + i); 416 hash ^= val; 417 } 418 419 /* 420 * If this is the first bit and we're not doing socket 421 * interleaving, then we need to add bit 14 to the running hash. 422 */ 423 if (i == 0 && nsock_bit == 0) { 424 uint8_t val = bitx64(dec->dec_ilv_pa, 14, 14); 425 hash ^= val; 426 } 427 428 /* 429 * If socket interleaving is going on we need to store the first 430 * bit as the socket hash and then redirect the remaining bits 431 * to the channel, taking into account that the shift will be 432 * adjusted as a result. 433 */ 434 if (nsock_bit > 0) { 435 if (i == 0) { 436 dec->dec_ilv_sock = hash; 437 } else { 438 dec->dec_ilv_chan |= hash << (i - 1); 439 } 440 } else { 441 dec->dec_ilv_chan |= hash << i; 442 } 443 } 444 445 return (B_TRUE); 446 } 447 448 /* 449 * This implements the logic to perform the Zen 3 6ch special hash. It's worth 450 * calling out that unlike all other hash functions, this does not support the 451 * use of the DF_DRAM_F_HASH_16_18 flag. 452 */ 453 static void 454 zen_umc_decode_hash_zen3_6ch(const df_dram_rule_t *rule, uint64_t pa, 455 uint8_t hashes[3]) 456 { 457 uint32_t addr_bit = rule->ddr_addr_start; 458 /* 459 * Yes, we use these in a weird order. No, there is no 64K. 460 */ 461 const uint32_t bits_2M[3] = { 23, 21, 22 }; 462 const uint32_t bits_1G[3] = { 32, 30, 31 }; 463 464 hashes[0] = hashes[1] = hashes[2] = 0; 465 for (uint_t i = 0; i < ZEN_UMC_COD_NBITS; i++) { 466 hashes[i] = bitx64(pa, addr_bit + i, addr_bit + i); 467 if (i == 0) { 468 uint8_t val = bitx64(pa, addr_bit + 3, addr_bit + 3); 469 hashes[i] ^= val; 470 } 471 472 if ((rule->ddr_flags & DF_DRAM_F_HASH_21_23) != 0) { 473 uint8_t val = bitx64(pa, bits_2M[i], bits_2M[i]); 474 hashes[i] ^= val; 475 } 476 477 if ((rule->ddr_flags & DF_DRAM_F_HASH_30_32) != 0) { 478 uint8_t val = bitx64(pa, bits_1G[i], bits_1G[i]); 479 hashes[i] ^= val; 480 } 481 } 482 } 483 484 /* 485 * Perform Zen 3 6-channel hashing. This is pretty weird compared to others. See 486 * the zen_umc.c big theory statement for the thorny details. 487 */ 488 static boolean_t 489 zen_umc_decode_ileave_zen3_6ch(const zen_umc_t *umc, zen_umc_decoder_t *dec) 490 { 491 uint8_t hashes[3] = { 0 }; 492 const df_dram_rule_t *rule = dec->dec_df_rule; 493 uint32_t addr_bit = rule->ddr_addr_start; 494 495 if (rule->ddr_sock_ileave_bits != 0 || rule->ddr_die_ileave_bits != 0) { 496 dec->dec_fail = ZEN_UMC_DECODE_F_COD_BAD_ILEAVE; 497 dec->dec_fail_data = dec->dec_df_ruleno; 498 return (B_FALSE); 499 } 500 501 zen_umc_decode_hash_zen3_6ch(rule, dec->dec_ilv_pa, hashes); 502 dec->dec_ilv_sock = dec->dec_ilv_die = dec->dec_ilv_chan = 0; 503 dec->dec_ilv_chan = hashes[0]; 504 if (hashes[1] == 1 && hashes[2] == 1) { 505 uint64_t mod_addr = dec->dec_ilv_pa >> (addr_bit + 3); 506 dec->dec_ilv_chan |= (mod_addr % 3) << 1; 507 } else { 508 dec->dec_ilv_chan |= hashes[1] << 1; 509 dec->dec_ilv_chan |= hashes[2] << 2; 510 } 511 512 return (B_TRUE); 513 } 514 515 /* 516 * This is the standard hash function for the non-power of two based NPS hashes. 517 * See the big theory statement for more information. Unlike the normal NPS hash 518 * which uses bit 14 conditionally based on socket interleaving, here it is 519 * always used. 520 */ 521 static void 522 zen_umc_decode_hash_nps_mod(const df_dram_rule_t *rule, uint64_t pa, 523 uint8_t hashes[3]) 524 { 525 const uint32_t addr_bits[3] = { rule->ddr_addr_start, 12, 13 }; 526 527 for (uint_t i = 0; i < ZEN_UMC_NPS_MOD_NBITS; i++) { 528 hashes[i] = bitx64(pa, addr_bits[i], addr_bits[i]); 529 if (i == 0) { 530 uint8_t val = bitx64(pa, 14, 14); 531 hashes[i] ^= val; 532 } 533 534 if ((rule->ddr_flags & DF_DRAM_F_HASH_16_18) != 0) { 535 uint8_t val = bitx64(pa, 16 + i, 16 + i); 536 hashes[i] ^= val; 537 } 538 539 if ((rule->ddr_flags & DF_DRAM_F_HASH_21_23) != 0) { 540 uint8_t val = bitx64(pa, 21 + i, 21 + i); 541 hashes[i] ^= val; 542 } 543 544 if ((rule->ddr_flags & DF_DRAM_F_HASH_30_32) != 0) { 545 uint8_t val = bitx64(pa, 30 + i, 30 + i); 546 hashes[i] ^= val; 547 } 548 } 549 } 550 551 /* 552 * See the big theory statement in zen_umc.c which describes the rules for this 553 * computation. This is a little less weird than the Zen 3 one, but still, 554 * unique. 555 */ 556 static boolean_t 557 zen_umc_decode_ileave_nps_mod(const zen_umc_t *umc, zen_umc_decoder_t *dec) 558 { 559 uint8_t hashes[3] = { 0 }; 560 uint32_t nsock_bit, chan_mod; 561 const df_dram_rule_t *rule = dec->dec_df_rule; 562 563 if (rule->ddr_die_ileave_bits != 0) { 564 dec->dec_fail = ZEN_UMC_DECODE_F_NPS_BAD_ILEAVE; 565 dec->dec_fail_data = dec->dec_df_ruleno; 566 return (B_FALSE); 567 } 568 569 nsock_bit = rule->ddr_sock_ileave_bits; 570 switch (rule->ddr_chan_ileave) { 571 case DF_CHAN_ILEAVE_NPS4_3CH: 572 case DF_CHAN_ILEAVE_NPS2_6CH: 573 case DF_CHAN_ILEAVE_NPS1_12CH: 574 chan_mod = 3; 575 break; 576 case DF_CHAN_ILEAVE_NPS2_5CH: 577 case DF_CHAN_ILEAVE_NPS1_10CH: 578 chan_mod = 5; 579 break; 580 default: 581 dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP; 582 dec->dec_fail_data = rule->ddr_chan_ileave; 583 return (B_FALSE); 584 } 585 586 dec->dec_ilv_sock = dec->dec_ilv_die = dec->dec_ilv_chan = 0; 587 zen_umc_decode_hash_nps_mod(rule, dec->dec_ilv_pa, hashes); 588 589 if (nsock_bit > 0) { 590 ASSERT3U(nsock_bit, ==, 1); 591 dec->dec_ilv_sock = hashes[0]; 592 } 593 594 dec->dec_ilv_chan = bitx64(dec->dec_ilv_pa, 63, 14) % chan_mod; 595 if (hashes[0] == 1) { 596 dec->dec_ilv_chan = (dec->dec_ilv_chan + 1) % chan_mod; 597 } 598 599 /* 600 * Use the remaining hash bits based on the number of channels. There is 601 * nothing else to do for 3/5 channel configs. 602 */ 603 switch (rule->ddr_chan_ileave) { 604 case DF_CHAN_ILEAVE_NPS4_3CH: 605 case DF_CHAN_ILEAVE_NPS2_5CH: 606 break; 607 case DF_CHAN_ILEAVE_NPS2_6CH: 608 case DF_CHAN_ILEAVE_NPS1_10CH: 609 dec->dec_ilv_chan += hashes[2] * chan_mod; 610 break; 611 case DF_CHAN_ILEAVE_NPS1_12CH: 612 dec->dec_ilv_chan += ((hashes[2] << 1) | hashes[1]) * chan_mod; 613 break; 614 default: 615 dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP; 616 dec->dec_fail_data = rule->ddr_chan_ileave; 617 return (B_FALSE); 618 } 619 620 return (B_TRUE); 621 } 622 623 /* 624 * Our next task is to attempt to translate the PA and the DF rule from a system 625 * address into a normalized address and a particular DRAM channel that it's 626 * targeting. There are several things that we need to take into account here 627 * when performing interleaving and translation: 628 * 629 * o The DRAM Hole modifying our base address 630 * o The various interleave bits 631 * o Potentially hashing based on channel and global settings 632 * o Potential CS re-targeting registers (only on some systems) 633 * o Finally, the question of how to adjust for the DRAM hole and the base 634 * address changes based on the DF generation and channel configuration. This 635 * influences what address we start interleaving with. 636 * 637 * Note, this phase does not actually construct the normalized (e.g. channel) 638 * address. That's done in a subsequent step. For more background, please see 639 * the 'Data Fabric Interleaving' section of the zen_umc.c big theory statement. 640 */ 641 static boolean_t 642 zen_umc_decode_sysaddr_to_csid(const zen_umc_t *umc, zen_umc_decoder_t *dec) 643 { 644 uint32_t sock, die, chan, remap_ruleset; 645 const df_dram_rule_t *rule = dec->dec_df_rule; 646 const zen_umc_cs_remap_t *remap; 647 648 /* 649 * First, we must determine what the actual address used for 650 * interleaving is. This varies based on the interleaving and DF 651 * generation. 652 */ 653 if (!zen_umc_determine_ileave_addr(umc, dec)) { 654 return (B_FALSE); 655 } 656 657 switch (rule->ddr_chan_ileave) { 658 case DF_CHAN_ILEAVE_1CH: 659 case DF_CHAN_ILEAVE_2CH: 660 case DF_CHAN_ILEAVE_4CH: 661 case DF_CHAN_ILEAVE_8CH: 662 case DF_CHAN_ILEAVE_16CH: 663 case DF_CHAN_ILEAVE_32CH: 664 if (!zen_umc_decode_ileave_nohash(umc, dec)) { 665 return (B_FALSE); 666 } 667 break; 668 case DF_CHAN_ILEAVE_COD4_2CH: 669 case DF_CHAN_ILEAVE_COD2_4CH: 670 case DF_CHAN_ILEAVE_COD1_8CH: 671 if (!zen_umc_decode_ileave_cod(umc, dec)) { 672 return (B_FALSE); 673 } 674 break; 675 case DF_CHAN_ILEAVE_NPS4_2CH: 676 case DF_CHAN_ILEAVE_NPS2_4CH: 677 case DF_CHAN_ILEAVE_NPS1_8CH: 678 if (!zen_umc_decode_ileave_nps(umc, dec)) { 679 return (B_FALSE); 680 } 681 break; 682 case DF_CHAN_ILEAVE_6CH: 683 if (!zen_umc_decode_ileave_zen3_6ch(umc, dec)) { 684 return (B_FALSE); 685 } 686 break; 687 case DF_CHAN_ILEAVE_NPS4_3CH: 688 case DF_CHAN_ILEAVE_NPS2_6CH: 689 case DF_CHAN_ILEAVE_NPS1_12CH: 690 case DF_CHAN_ILEAVE_NPS2_5CH: 691 case DF_CHAN_ILEAVE_NPS1_10CH: 692 if (!zen_umc_decode_ileave_nps_mod(umc, dec)) { 693 return (B_FALSE); 694 } 695 break; 696 default: 697 dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP; 698 dec->dec_fail_data = rule->ddr_chan_ileave; 699 return (B_FALSE); 700 } 701 702 /* 703 * At this point we have dealt with decoding the interleave into the 704 * logical elements that it contains. We need to transform that back 705 * into a fabric ID, so we can add it to the base fabric ID in our rule. 706 * After that, we need to see if there is any CS remapping going on. If 707 * there is, we will replace the component part of the decomposed fabric 708 * ID. With that done, we can then transform the components back into 709 * our target fabric ID, which indicates which UMC we're after. 710 */ 711 zen_fabric_id_compose(&umc->umc_decomp, dec->dec_ilv_sock, 712 dec->dec_ilv_die, dec->dec_ilv_chan, &dec->dec_ilv_fabid); 713 dec->dec_log_fabid = dec->dec_ilv_fabid + rule->ddr_dest_fabid; 714 715 /* 716 * If there's no remapping to do, then we're done. Simply assign the 717 * logical ID as our target. 718 */ 719 zen_fabric_id_decompose(&umc->umc_decomp, dec->dec_log_fabid, &sock, 720 &die, &chan); 721 if ((rule->ddr_flags & DF_DRAM_F_REMAP_EN) == 0) { 722 dec->dec_targ_fabid = dec->dec_log_fabid; 723 return (B_TRUE); 724 } 725 726 /* 727 * The DF contains multiple remapping tables. We must figure out which 728 * of these to actually use. There are two different ways that this can 729 * work. The first way is the one added in DFv4 and is used since then. 730 * In that case, the DRAM rule includes both that remapping was enabled 731 * and which of the multiple mapping tables to use. 732 * 733 * This feature also exists prior to DFv4, but only in Milan. In that 734 * world, indicated by the DF_DRAM_F_REMAP_SOCK flag, there is one table 735 * in each DF per-socket. Based on the destination socket from the data 736 * fabric ID, you pick the actual table to use. 737 * 738 * Once the table has been selected, we maintain the socket and die 739 * portions of the fabric ID as constants and replace the component with 740 * the one the remapping table indicates. 741 * 742 * Technically each DF has its own copy of the remapping tables. To make 743 * this work we rely on the following assumption: a given DF node has to 744 * be able to fully route all DRAM rules to a target. That is, a given 745 * DF node doesn't really forward a system address to the remote die for 746 * further interleave processing and therefore we must have enough 747 * information here to map it totally from the same DF that we got the 748 * CCM rules from in the first place, DF 0. 749 */ 750 if ((rule->ddr_flags & DF_DRAM_F_REMAP_SOCK) != 0) { 751 remap_ruleset = sock; 752 } else { 753 remap_ruleset = rule->ddr_remap_ent; 754 } 755 756 if (remap_ruleset >= dec->dec_df_rulesrc->zud_cs_nremap) { 757 dec->dec_fail = ZEN_UMC_DECODE_F_BAD_REMAP_SET; 758 dec->dec_fail_data = remap_ruleset; 759 return (B_FALSE); 760 } 761 762 remap = &dec->dec_df_rulesrc->zud_remap[remap_ruleset]; 763 if (chan >= remap->csr_nremaps) { 764 dec->dec_fail = ZEN_UMC_DECODE_F_BAD_REMAP_ENTRY; 765 dec->dec_fail_data = chan; 766 return (B_FALSE); 767 } 768 769 dec->dec_remap_comp = remap->csr_remaps[chan]; 770 if ((dec->dec_remap_comp & ~umc->umc_decomp.dfd_comp_mask) != 0) { 771 dec->dec_fail = ZEN_UMC_DECODE_F_REMAP_HAS_BAD_COMP; 772 dec->dec_fail_data = dec->dec_remap_comp; 773 return (B_FALSE); 774 } 775 776 zen_fabric_id_compose(&umc->umc_decomp, sock, die, dec->dec_remap_comp, 777 &dec->dec_targ_fabid); 778 779 return (B_TRUE); 780 } 781 782 /* 783 * Our next step here is to actually take our target ID and find the 784 * corresponding DF, UMC, and actual rule that was used. Note, we don't 785 * decompose the ID and look things up that way for a few reasons. While each 786 * UMC should map linearly to its instance/component ID, there are suggestions 787 * that they can be renumbered. This makes it simplest to just walk over 788 * everything (and there aren't that many things to walk over either). 789 */ 790 static boolean_t 791 zen_umc_decode_find_umc_rule(const zen_umc_t *umc, zen_umc_decoder_t *dec) 792 { 793 for (uint_t dfno = 0; dfno < umc->umc_ndfs; dfno++) { 794 const zen_umc_df_t *df = &umc->umc_dfs[dfno]; 795 for (uint_t umcno = 0; umcno < df->zud_nchan; umcno++) { 796 const zen_umc_chan_t *chan = &df->zud_chan[umcno]; 797 798 if (chan->chan_fabid != dec->dec_targ_fabid) { 799 continue; 800 } 801 802 /* 803 * At this point we have found the UMC that we were 804 * looking for. Snapshot that and then figure out which 805 * rule index of it corresponds to our mapping so we can 806 * properly determine an offset. We will still use the 807 * primary CCM rule for all other calculations. 808 */ 809 dec->dec_umc_chan = chan; 810 for (uint32_t ruleno = 0; ruleno < chan->chan_nrules; 811 ruleno++) { 812 const df_dram_rule_t *rule = 813 &chan->chan_rules[ruleno]; 814 if ((rule->ddr_flags & DF_DRAM_F_VALID) == 0) { 815 continue; 816 } 817 818 if (dec->dec_pa >= rule->ddr_base && 819 dec->dec_pa < rule->ddr_limit) { 820 dec->dec_umc_ruleno = ruleno; 821 return (B_TRUE); 822 } 823 } 824 825 dec->dec_fail = ZEN_UMC_DECODE_F_UMC_DOESNT_HAVE_PA; 826 return (B_FALSE); 827 } 828 } 829 830 dec->dec_fail = ZEN_UMC_DECODE_F_CANNOT_MAP_FABID; 831 return (B_FALSE); 832 } 833 834 /* 835 * Non-hashing interleave modes system address normalization logic. See the 836 * zen_umc.c big theory statement for more information. 837 */ 838 static boolean_t 839 zen_umc_decode_normalize_nohash(const zen_umc_t *umc, zen_umc_decoder_t *dec) 840 { 841 uint_t nbits = 0; 842 const df_dram_rule_t *rule = dec->dec_df_rule; 843 844 nbits += rule->ddr_sock_ileave_bits; 845 nbits += rule->ddr_die_ileave_bits; 846 switch (rule->ddr_chan_ileave) { 847 case DF_CHAN_ILEAVE_1CH: 848 break; 849 case DF_CHAN_ILEAVE_2CH: 850 nbits += 1; 851 break; 852 case DF_CHAN_ILEAVE_4CH: 853 nbits += 2; 854 break; 855 case DF_CHAN_ILEAVE_8CH: 856 nbits += 3; 857 break; 858 case DF_CHAN_ILEAVE_16CH: 859 nbits += 4; 860 break; 861 case DF_CHAN_ILEAVE_32CH: 862 nbits += 5; 863 break; 864 default: 865 dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP; 866 dec->dec_fail_data = rule->ddr_chan_ileave; 867 return (B_FALSE); 868 } 869 870 /* 871 * If we have a really simple configuration (e.g. no interleaving at 872 * all), then make sure that we do not actually do anything here. 873 */ 874 if (nbits > 0) { 875 dec->dec_norm_addr = bitdel64(dec->dec_norm_addr, 876 rule->ddr_addr_start + nbits - 1, rule->ddr_addr_start); 877 } 878 879 return (B_TRUE); 880 } 881 882 /* 883 * COD/NPS system address normalization logic. See the zen_umc.c big theory 884 * statement for more information. 885 */ 886 static boolean_t 887 zen_umc_decode_normalize_hash(const zen_umc_t *umc, zen_umc_decoder_t *dec) 888 { 889 uint_t nbits = 0; 890 const df_dram_rule_t *rule = dec->dec_df_rule; 891 892 /* 893 * NPS hashes allow for socket interleaving, COD hashes do not. Add 894 * socket interleaving, skip die. 895 */ 896 nbits += rule->ddr_sock_ileave_bits; 897 switch (rule->ddr_chan_ileave) { 898 case DF_CHAN_ILEAVE_COD4_2CH: 899 case DF_CHAN_ILEAVE_NPS4_2CH: 900 nbits += 1; 901 break; 902 case DF_CHAN_ILEAVE_COD2_4CH: 903 case DF_CHAN_ILEAVE_NPS2_4CH: 904 nbits += 2; 905 break; 906 case DF_CHAN_ILEAVE_COD1_8CH: 907 case DF_CHAN_ILEAVE_NPS1_8CH: 908 nbits += 3; 909 break; 910 default: 911 dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP; 912 dec->dec_fail_data = rule->ddr_chan_ileave; 913 } 914 915 /* 916 * Always remove high order bits before low order bits so we don't have 917 * to adjust the bits we need to remove. 918 */ 919 if (nbits > 1) { 920 uint_t start = 12; 921 uint_t end = start + (nbits - 2); 922 dec->dec_norm_addr = bitdel64(dec->dec_norm_addr, end, start); 923 } 924 925 dec->dec_norm_addr = bitdel64(dec->dec_norm_addr, rule->ddr_addr_start, 926 rule->ddr_addr_start); 927 return (B_TRUE); 928 } 929 930 /* 931 * Now it's time to perform normalization of our favorite interleaving type. 932 * Please see the comments in zen_umc.c on this to understand what we're doing 933 * here and why. 934 */ 935 static boolean_t 936 zen_umc_decode_normalize_zen3_6ch(const zen_umc_t *umc, zen_umc_decoder_t *dec) 937 { 938 uint8_t hashes[3] = { 0 }; 939 uint_t start, end; 940 const df_dram_rule_t *rule = dec->dec_df_rule; 941 942 /* 943 * As per the theory statement, we always remove the hash bits here from 944 * the starting address. Because this is a 6-channel config, that turns 945 * into 3. Perform the hash again first. 946 */ 947 zen_umc_decode_hash_zen3_6ch(rule, dec->dec_norm_addr, hashes); 948 start = rule->ddr_addr_start; 949 end = rule->ddr_addr_start + ZEN_UMC_COD_NBITS - 1; 950 dec->dec_norm_addr = bitdel64(dec->dec_norm_addr, end, start); 951 952 /* 953 * This is the case the theory statement warned about. This gets 954 * normalized to the top of the DIMM's range (its two upper most bits 955 * are set). 956 */ 957 if (hashes[1] == 1 && hashes[2] == 1) { 958 uint_t start = 14 - ZEN_UMC_COD_NBITS + 959 dec->dec_umc_chan->chan_np2_space0; 960 dec->dec_norm_addr = bitset64(dec->dec_norm_addr, start + 1, 961 start, 0x3); 962 } 963 964 return (B_TRUE); 965 } 966 967 /* 968 * Based on the algorithm of sorts described in zen_umc.c, we have a few 969 * different phases of extraction and combination. This isn't quite like the 970 * others where we simply delete bits. 971 */ 972 static boolean_t 973 zen_umc_decode_normalize_nps_mod(const zen_umc_t *umc, zen_umc_decoder_t *dec) 974 { 975 uint64_t low, high, mid; 976 uint_t nbits, chan_mod, sock_bits, nmid_bits; 977 uint_t mid_start, mid_end; 978 uint8_t hashes[3] = { 0 }; 979 const df_dram_rule_t *rule = dec->dec_df_rule; 980 981 sock_bits = rule->ddr_sock_ileave_bits; 982 switch (rule->ddr_chan_ileave) { 983 case DF_CHAN_ILEAVE_NPS4_3CH: 984 chan_mod = 3; 985 nbits = 1; 986 break; 987 case DF_CHAN_ILEAVE_NPS2_5CH: 988 chan_mod = 5; 989 nbits = 1; 990 break; 991 case DF_CHAN_ILEAVE_NPS2_6CH: 992 chan_mod = 3; 993 nbits = 2; 994 break; 995 case DF_CHAN_ILEAVE_NPS1_10CH: 996 chan_mod = 5; 997 nbits = 2; 998 break; 999 case DF_CHAN_ILEAVE_NPS1_12CH: 1000 chan_mod = 3; 1001 nbits = 3; 1002 break; 1003 default: 1004 dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP; 1005 dec->dec_fail_data = rule->ddr_chan_ileave; 1006 return (B_FALSE); 1007 } 1008 1009 /* 1010 * First extract the low bit range that we're using which is everything 1011 * below the starting interleave address. We also always extract the 1012 * high bits, which are always [63:14] and divide it by the modulus. 1013 * Note, we apply the hash after any such division if needed. It becomes 1014 * the new least significant bit. 1015 */ 1016 low = bitx64(dec->dec_norm_addr, rule->ddr_addr_start - 1, 0); 1017 high = bitx64(dec->dec_norm_addr, 63, 14) / chan_mod; 1018 zen_umc_decode_hash_nps_mod(rule, dec->dec_norm_addr, hashes); 1019 if (sock_bits == 0) { 1020 high = (high << 1) | hashes[0]; 1021 } 1022 1023 /* 1024 * Now for the weirdest bit here, extracting the middle bits. Recall 1025 * this hash uses bit 8, then 13, then 12 (the hash order is still 8, 1026 * 12, 13, but it uses the hashes[2] before hashes[1] in 1027 * zen_umc_decode_ileave_nps_mod()). So if we're only using 1 interleave 1028 * bit, we just remove bit 8 (assuming that is our starting address) and 1029 * our range is [13:9]. If we're using two, our range becomes [12:9], 1030 * and if three, [11:9]. The 6 - nbits below comes from the fact that in 1031 * a 1 bit interleave we have 5 bits. Because our mid_start/mid_end 1032 * range is inclusive, we subtract one at the end from mid_end. 1033 */ 1034 nmid_bits = 6 - nbits; 1035 mid_start = rule->ddr_addr_start + 1; 1036 mid_end = mid_start + nmid_bits - 1; 1037 mid = bitx64(dec->dec_norm_addr, mid_end, mid_start); 1038 1039 /* 1040 * Because we've been removing bits, we don't use any of the start and 1041 * ending ranges we calculated above for shifts, as that was what we 1042 * needed from the original address. 1043 */ 1044 dec->dec_norm_addr = low | (mid << rule->ddr_addr_start) | (high << 1045 (rule->ddr_addr_start + nmid_bits)); 1046 1047 return (B_TRUE); 1048 } 1049 1050 /* 1051 * Now we need to go through and try to construct a normalized address using all 1052 * the information that we've gathered to date. To do this we need to take into 1053 * account all of the following transformations on the address that need to 1054 * occur. We apply modifications to the address in the following order: 1055 * 1056 * o The base address of the rule 1057 * o DRAM hole changes 1058 * o Normalization of the address due to interleaving (more fun) 1059 * o The DRAM offset register of the rule 1060 */ 1061 static boolean_t 1062 zen_umc_decode_sysaddr_to_norm(const zen_umc_t *umc, zen_umc_decoder_t *dec) 1063 { 1064 const zen_umc_chan_t *chan = dec->dec_umc_chan; 1065 const df_dram_rule_t *rule = dec->dec_df_rule; 1066 1067 dec->dec_norm_addr = dec->dec_pa; 1068 if (!zen_umc_adjust_dram_addr(umc, dec, &dec->dec_norm_addr, 1069 ZEN_UMC_DECODE_F_CALC_NORM_UNDERFLOW)) { 1070 return (B_FALSE); 1071 } 1072 1073 /* 1074 * Now for the most annoying part of this whole thing, normalizing based 1075 * on our actual interleave format. The reason for this is that when 1076 * interleaving is going on, it actually is removing bits that are just 1077 * being used to direct it somewhere; however, it's actually generally 1078 * speaking the same value in each location. See the big theory 1079 * statement in zen_umc.c for more information. 1080 */ 1081 switch (rule->ddr_chan_ileave) { 1082 case DF_CHAN_ILEAVE_1CH: 1083 case DF_CHAN_ILEAVE_2CH: 1084 case DF_CHAN_ILEAVE_4CH: 1085 case DF_CHAN_ILEAVE_8CH: 1086 case DF_CHAN_ILEAVE_16CH: 1087 case DF_CHAN_ILEAVE_32CH: 1088 if (!zen_umc_decode_normalize_nohash(umc, dec)) { 1089 return (B_FALSE); 1090 } 1091 break; 1092 case DF_CHAN_ILEAVE_COD4_2CH: 1093 case DF_CHAN_ILEAVE_COD2_4CH: 1094 case DF_CHAN_ILEAVE_COD1_8CH: 1095 case DF_CHAN_ILEAVE_NPS4_2CH: 1096 case DF_CHAN_ILEAVE_NPS2_4CH: 1097 case DF_CHAN_ILEAVE_NPS1_8CH: 1098 if (!zen_umc_decode_normalize_hash(umc, dec)) { 1099 return (B_FALSE); 1100 } 1101 break; 1102 case DF_CHAN_ILEAVE_6CH: 1103 if (!zen_umc_decode_normalize_zen3_6ch(umc, dec)) { 1104 return (B_FALSE); 1105 } 1106 break; 1107 case DF_CHAN_ILEAVE_NPS4_3CH: 1108 case DF_CHAN_ILEAVE_NPS2_6CH: 1109 case DF_CHAN_ILEAVE_NPS1_12CH: 1110 case DF_CHAN_ILEAVE_NPS2_5CH: 1111 case DF_CHAN_ILEAVE_NPS1_10CH: 1112 if (!zen_umc_decode_normalize_nps_mod(umc, dec)) { 1113 return (B_FALSE); 1114 } 1115 break; 1116 default: 1117 dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP; 1118 dec->dec_fail_data = rule->ddr_chan_ileave; 1119 return (B_FALSE); 1120 } 1121 1122 /* 1123 * Determine if this rule has an offset to apply. Note, there is never 1124 * an offset for rule 0, hence the index into this is one less than the 1125 * actual rule number. Unlike other transformations these offsets 1126 * describe the start of a normalized range. Therefore we need to 1127 * actually add this value instead of subtract. 1128 */ 1129 if (dec->dec_umc_ruleno > 0) { 1130 uint32_t offno = dec->dec_umc_ruleno - 1; 1131 const chan_offset_t *offset = &chan->chan_offsets[offno]; 1132 1133 if (offset->cho_valid) { 1134 dec->dec_norm_addr += offset->cho_offset; 1135 } 1136 } 1137 1138 return (B_TRUE); 1139 } 1140 1141 /* 1142 * This applies the formula that determines a chip-select actually matches which 1143 * is defined as (address & ~mask) == (base & ~mask) in the PPR. There is both a 1144 * primary and secondary mask here. We need to pay attention to which is used 1145 * (if any) for later on. 1146 */ 1147 static boolean_t 1148 zen_umc_decoder_cs_matches(const umc_cs_t *cs, const uint64_t norm, 1149 boolean_t *matched_sec) 1150 { 1151 if (cs->ucs_base.udb_valid != 0) { 1152 uint64_t imask = ~cs->ucs_base_mask; 1153 if ((norm & imask) == (cs->ucs_base.udb_base & imask)) { 1154 *matched_sec = B_FALSE; 1155 return (B_TRUE); 1156 } 1157 } 1158 1159 if (cs->ucs_sec.udb_valid != 0) { 1160 uint64_t imask = ~cs->ucs_sec_mask; 1161 if ((norm & imask) == (cs->ucs_sec.udb_base & imask)) { 1162 *matched_sec = B_TRUE; 1163 return (B_TRUE); 1164 } 1165 } 1166 1167 return (B_FALSE); 1168 } 1169 1170 /* 1171 * Go through with our normalized address and map it to a given chip-select. 1172 * This as a side effect indicates which DIMM we're going out on as well. Note, 1173 * the final DIMM can change due to chip-select hashing; however, we use this 1174 * DIMM for determining all of the actual address translations. 1175 */ 1176 static boolean_t 1177 zen_umc_decode_find_cs(const zen_umc_t *umc, zen_umc_decoder_t *dec) 1178 { 1179 const zen_umc_chan_t *chan = dec->dec_umc_chan; 1180 1181 for (uint_t dimmno = 0; dimmno < ZEN_UMC_MAX_DIMMS; dimmno++) { 1182 const umc_dimm_t *dimm = &chan->chan_dimms[dimmno]; 1183 1184 if ((dimm->ud_flags & UMC_DIMM_F_VALID) == 0) 1185 continue; 1186 1187 for (uint_t csno = 0; csno < ZEN_UMC_MAX_CS_PER_DIMM; csno++) { 1188 const umc_cs_t *cs = &dimm->ud_cs[csno]; 1189 boolean_t is_sec = B_FALSE; 1190 1191 if (zen_umc_decoder_cs_matches(cs, dec->dec_norm_addr, 1192 &is_sec)) { 1193 dec->dec_dimm = dimm; 1194 dec->dec_cs = cs; 1195 dec->dec_log_csno = dimmno * ZEN_UMC_MAX_DIMMS + 1196 csno; 1197 dec->dec_cs_sec = is_sec; 1198 return (B_TRUE); 1199 } 1200 } 1201 } 1202 1203 dec->dec_fail = ZEN_UMC_DECODE_F_NO_CS_BASE_MATCH; 1204 return (B_FALSE); 1205 } 1206 1207 /* 1208 * Extract the column from the address. For once, something that is almost 1209 * straightforward. 1210 */ 1211 static boolean_t 1212 zen_umc_decode_cols(const zen_umc_t *umc, zen_umc_decoder_t *dec) 1213 { 1214 uint32_t cols = 0; 1215 const umc_cs_t *cs = dec->dec_cs; 1216 1217 for (uint_t i = 0; i < cs->ucs_ncol; i++) { 1218 uint32_t index; 1219 1220 index = cs->ucs_col_bits[i]; 1221 cols |= bitx64(dec->dec_norm_addr, index, index) << i; 1222 } 1223 1224 dec->dec_dimm_col = cols; 1225 return (B_TRUE); 1226 } 1227 1228 /* 1229 * The row is split into two different regions. There's a low and high value, 1230 * though the high value is only present in DDR4. Unlike the column, where each 1231 * bit is spelled out, each set of row bits are contiguous (low and high are 1232 * independent). 1233 */ 1234 static boolean_t 1235 zen_umc_decode_rows(const zen_umc_t *umc, zen_umc_decoder_t *dec) 1236 { 1237 uint32_t row = 0; 1238 uint8_t inv; 1239 const umc_cs_t *cs = dec->dec_cs; 1240 const uint_t total_bits = cs->ucs_nrow_lo + cs->ucs_nrow_hi; 1241 const uint_t lo_end = cs->ucs_nrow_lo + cs->ucs_row_low_bit - 1; 1242 1243 row = bitx64(dec->dec_norm_addr, lo_end, cs->ucs_row_low_bit); 1244 if (cs->ucs_nrow_hi > 0) { 1245 const uint_t hi_end = cs->ucs_nrow_hi + cs->ucs_row_hi_bit - 1; 1246 const uint32_t hi = bitx64(dec->dec_norm_addr, hi_end, 1247 cs->ucs_row_hi_bit); 1248 1249 row |= hi << cs->ucs_nrow_lo; 1250 } 1251 1252 if (dec->dec_cs_sec) { 1253 inv = cs->ucs_inv_msbs_sec; 1254 } else { 1255 inv = cs->ucs_inv_msbs; 1256 } 1257 1258 /* 1259 * We need to potentially invert the top two bits of the row address 1260 * based on the low two bits of the inverted register below. Note, inv 1261 * only has two valid bits below. So we shift them into place to perform 1262 * the XOR. See the big theory statement in zen_umc.c for more on why 1263 * this works. 1264 */ 1265 inv = inv << (total_bits - 2); 1266 row = row ^ inv; 1267 1268 dec->dec_dimm_row = row; 1269 return (B_TRUE); 1270 } 1271 1272 /* 1273 * Several of the hash schemes ask us to go through and xor all the bits that 1274 * are in an address to transform it into a single bit. This implements that for 1275 * a uint32_t. This is basically a bitwise XOR reduce. 1276 */ 1277 static uint8_t 1278 zen_umc_running_xor32(const uint32_t in) 1279 { 1280 uint8_t run = 0; 1281 1282 for (uint_t i = 0; i < sizeof (in) * NBBY; i++) { 1283 run ^= bitx32(in, i, i); 1284 } 1285 1286 return (run); 1287 } 1288 1289 static uint8_t 1290 zen_umc_running_xor64(const uint64_t in) 1291 { 1292 uint8_t run = 0; 1293 1294 for (uint_t i = 0; i < sizeof (in) * NBBY; i++) { 1295 run ^= bitx64(in, i, i); 1296 } 1297 1298 return (run); 1299 } 1300 1301 /* 1302 * Our goal here is to extract the number of banks and bank groups that are 1303 * used, if any. 1304 */ 1305 static boolean_t 1306 zen_umc_decode_banks(const zen_umc_t *umc, zen_umc_decoder_t *dec) 1307 { 1308 uint8_t bank = 0; 1309 const umc_cs_t *cs = dec->dec_cs; 1310 const umc_chan_hash_t *hash = &dec->dec_umc_chan->chan_hash; 1311 1312 /* 1313 * Get an initial bank address bit and then perform any hashing if 1314 * bank hashing is enabled. Note, the memory controller's nbanks is the 1315 * total number of bank and bank group bits, hence why it's used for 1316 * the loop counter. 1317 */ 1318 for (uint_t i = 0; i < cs->ucs_nbanks; i++) { 1319 uint32_t row_hash, col_hash; 1320 uint8_t row_xor, col_xor; 1321 uint_t targ = cs->ucs_bank_bits[i]; 1322 uint8_t val = bitx64(dec->dec_norm_addr, targ, targ); 1323 const umc_bank_hash_t *bank_hash = &hash->uch_bank_hashes[i]; 1324 1325 if ((hash->uch_flags & UMC_CHAN_HASH_F_BANK) == 0 || 1326 !hash->uch_bank_hashes[i].ubh_en) { 1327 bank |= val << i; 1328 continue; 1329 } 1330 1331 /* 1332 * See the big theory statement for more on this. Short form, 1333 * bit-wise AND the row and column, then XOR shenanigans. 1334 */ 1335 row_hash = dec->dec_dimm_row & bank_hash->ubh_row_xor; 1336 col_hash = dec->dec_dimm_col & bank_hash->ubh_col_xor; 1337 row_xor = zen_umc_running_xor32(row_hash); 1338 col_xor = zen_umc_running_xor32(col_hash); 1339 bank |= (row_xor ^ col_xor ^ val) << i; 1340 } 1341 1342 /* 1343 * The bank and bank group are conjoined in the register and bit 1344 * definitions. Once we've calculated that, extract it. 1345 */ 1346 dec->dec_dimm_bank_group = bitx8(bank, cs->ucs_nbank_groups - 1, 0); 1347 dec->dec_dimm_bank = bitx8(bank, cs->ucs_nbanks, cs->ucs_nbank_groups); 1348 return (B_TRUE); 1349 } 1350 1351 /* 1352 * Extract the sub-channel. If not a DDR5 based device, simply set it to zero 1353 * and return. We can't forget to hash this if required. 1354 */ 1355 static boolean_t 1356 zen_umc_decode_subchan(const zen_umc_t *umc, zen_umc_decoder_t *dec) 1357 { 1358 uint8_t subchan; 1359 uint32_t row_hash, col_hash, bank_hash; 1360 uint8_t row_xor, col_xor, bank_xor; 1361 const umc_cs_t *cs = dec->dec_cs; 1362 const umc_chan_hash_t *hash = &dec->dec_umc_chan->chan_hash; 1363 1364 switch (dec->dec_umc_chan->chan_type) { 1365 case UMC_DIMM_T_DDR5: 1366 case UMC_DIMM_T_LPDDR5: 1367 break; 1368 default: 1369 dec->dec_dimm_subchan = 0; 1370 return (B_TRUE); 1371 } 1372 1373 subchan = bitx64(dec->dec_norm_addr, cs->ucs_subchan, cs->ucs_subchan); 1374 if ((hash->uch_flags & UMC_CHAN_HASH_F_PC) == 0 || 1375 !hash->uch_pc_hash.uph_en) { 1376 dec->dec_dimm_subchan = subchan; 1377 return (B_TRUE); 1378 } 1379 1380 row_hash = dec->dec_dimm_row & hash->uch_pc_hash.uph_row_xor; 1381 col_hash = dec->dec_dimm_col & hash->uch_pc_hash.uph_col_xor; 1382 bank_hash = dec->dec_dimm_bank & hash->uch_pc_hash.uph_bank_xor; 1383 row_xor = zen_umc_running_xor32(row_hash); 1384 col_xor = zen_umc_running_xor32(col_hash); 1385 bank_xor = zen_umc_running_xor32(bank_hash); 1386 1387 dec->dec_dimm_subchan = subchan ^ row_xor ^ col_xor ^ bank_xor; 1388 return (B_TRUE); 1389 } 1390 1391 /* 1392 * Note that we have normalized the RM bits between the primary and secondary 1393 * base/mask registers so that way even though the DDR5 controller always uses 1394 * the same RM selection bits, it works in a uniform way for both DDR4 and DDR5. 1395 */ 1396 static boolean_t 1397 zen_umc_decode_rank_mul(const zen_umc_t *umc, zen_umc_decoder_t *dec) 1398 { 1399 uint8_t rm = 0; 1400 const umc_cs_t *cs = dec->dec_cs; 1401 const umc_chan_hash_t *hash = &dec->dec_umc_chan->chan_hash; 1402 1403 for (uint_t i = 0; i < cs->ucs_nrm; i++) { 1404 uint8_t index = cs->ucs_rm_bits[i]; 1405 uint8_t bit = bitx64(dec->dec_norm_addr, index, index); 1406 1407 if ((hash->uch_flags & UMC_CHAN_HASH_F_RM) != 0 && 1408 hash->uch_rm_hashes[i].uah_en) { 1409 uint64_t norm_mask = dec->dec_norm_addr & 1410 hash->uch_rm_hashes[i].uah_addr_xor; 1411 uint8_t norm_hash = zen_umc_running_xor64(norm_mask); 1412 bit = bit ^ norm_hash; 1413 } 1414 1415 rm |= bit << i; 1416 } 1417 1418 dec->dec_dimm_rm = rm; 1419 return (B_TRUE); 1420 } 1421 1422 /* 1423 * Go through and determine the actual chip-select activated. This is subject to 1424 * hashing. Note, we first constructed a logical chip-select value based on 1425 * which of the four base/mask registers in the UMC we activated for the 1426 * channel. That basically seeded the two bit value we start with. 1427 */ 1428 static boolean_t 1429 zen_umc_decode_chipsel(const zen_umc_t *umc, zen_umc_decoder_t *dec) 1430 { 1431 uint8_t csno = 0; 1432 const umc_cs_t *cs = dec->dec_cs; 1433 const umc_chan_hash_t *hash = &dec->dec_umc_chan->chan_hash; 1434 1435 for (uint_t i = 0; i < ZEN_UMC_MAX_CS_BITS; i++) { 1436 uint8_t bit = bitx8(dec->dec_log_csno, i, i); 1437 if ((hash->uch_flags & UMC_CHAN_HASH_F_CS) != 0 && 1438 hash->uch_cs_hashes[i].uah_en) { 1439 uint64_t mask = dec->dec_norm_addr & 1440 hash->uch_cs_hashes[i].uah_addr_xor; 1441 uint8_t rxor = zen_umc_running_xor64(mask); 1442 bit = bit ^ rxor; 1443 } 1444 csno |= bit << i; 1445 } 1446 1447 /* 1448 * It is not entirely clear what the circumstances are that we need to 1449 * apply the chip-select xor. Right now we always apply it. This only 1450 * exists on a few DDR5 SoCs, it seems, and we zero out other cases to 1451 * try and have a uniform and reasonable path. This tells us what the 1452 * absolute chip-select is in the channel. We record this for debugging 1453 * purposes and to derive the DIMM and CS. 1454 */ 1455 dec->dec_chan_csno = (csno ^ cs->ucs_cs_xor) & 0x3; 1456 1457 /* 1458 * Now that we actually know which chip-select we're targeting, go back 1459 * and actual indicate which DIMM we'll go out to and what chip-select 1460 * it is relative to the DIMM. This may have changed out due to CS 1461 * hashing. As such we have to now snapshot our final DIMM and 1462 * chip-select. 1463 */ 1464 dec->dec_dimm_no = dec->dec_chan_csno >> 1; 1465 dec->dec_dimm_csno = dec->dec_chan_csno % 2; 1466 return (B_TRUE); 1467 } 1468 1469 /* 1470 * Initialize the decoder state. We do this by first zeroing it all and then 1471 * setting various result addresses to the UINTXX_MAX that is appropriate. These 1472 * work as better sentinel values than zero; however, we always zero the 1473 * structure to be defensive, cover pointers, etc. 1474 */ 1475 static void 1476 zen_umc_decoder_init(zen_umc_decoder_t *dec) 1477 { 1478 bzero(dec, sizeof (*dec)); 1479 1480 dec->dec_pa = dec->dec_ilv_pa = UINT64_MAX; 1481 dec->dec_df_ruleno = UINT32_MAX; 1482 dec->dec_ilv_sock = dec->dec_ilv_die = dec->dec_ilv_chan = 1483 dec->dec_ilv_fabid = dec->dec_log_fabid = dec->dec_remap_comp = 1484 dec->dec_targ_fabid = UINT32_MAX; 1485 dec->dec_umc_ruleno = UINT32_MAX; 1486 dec->dec_norm_addr = UINT64_MAX; 1487 dec->dec_dimm_col = dec->dec_dimm_row = UINT32_MAX; 1488 dec->dec_log_csno = dec->dec_dimm_bank = dec->dec_dimm_bank_group = 1489 dec->dec_dimm_subchan = dec->dec_dimm_rm = dec->dec_chan_csno = 1490 dec->dec_dimm_no = dec->dec_dimm_csno = UINT8_MAX; 1491 } 1492 1493 boolean_t 1494 zen_umc_decode_pa(const zen_umc_t *umc, const uint64_t pa, 1495 zen_umc_decoder_t *dec) 1496 { 1497 zen_umc_decoder_init(dec); 1498 dec->dec_pa = pa; 1499 1500 /* 1501 * Before we proceed through decoding, the first thing we should try to 1502 * do is verify that this is even something that could be DRAM. 1503 */ 1504 if (!zen_umc_decode_is_dram(umc, dec)) { 1505 ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE); 1506 return (B_FALSE); 1507 } 1508 1509 /* 1510 * The very first thing that we need to do is find a data fabric rule 1511 * that corresponds to this memory address. This will be used to 1512 * determine which set of rules for interleave and related we actually 1513 * should then use. 1514 */ 1515 if (!zen_umc_decode_find_df_rule(umc, dec)) { 1516 ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE); 1517 return (B_FALSE); 1518 } 1519 1520 /* 1521 * Now that we have a DF rule, we must take a more involved step of 1522 * mapping to a given CS, e.g. a specific UMC channel. This will tell us 1523 * the socket and die as well. This takes care of all the interleaving 1524 * and remapping and produces a target fabric ID. 1525 */ 1526 if (!zen_umc_decode_sysaddr_to_csid(umc, dec)) { 1527 ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE); 1528 return (B_FALSE); 1529 } 1530 1531 /* 1532 * With that target ID known, now actually map this to a corresponding 1533 * UMC. 1534 */ 1535 if (!zen_umc_decode_find_umc_rule(umc, dec)) { 1536 ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE); 1537 return (B_FALSE); 1538 } 1539 1540 /* 1541 * With the target and corresponding rules and offset information, 1542 * actually perform normalization. 1543 */ 1544 if (!zen_umc_decode_sysaddr_to_norm(umc, dec)) { 1545 ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE); 1546 return (B_FALSE); 1547 } 1548 1549 /* 1550 * Finally, we somehow managed to actually construct a normalized 1551 * address. Now we must begin the act of transforming this channel 1552 * address into something that makes sense to address a DIMM. To start 1553 * with determine which logical chip-select, which determines where we 1554 * source all our data to use. 1555 */ 1556 if (!zen_umc_decode_find_cs(umc, dec)) { 1557 ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE); 1558 return (B_FALSE); 1559 } 1560 1561 /* 1562 * Now that we have the logical chip-select matched that we're sourcing 1563 * our data from, the next this is a bit more involved: we need to 1564 * extract the row, column, rank/rank multiplication, bank, and bank 1565 * group out of all this, while taking into account all of our hashes. 1566 * 1567 * To do this, we begin by first calculating the row and column as those 1568 * will be needed to determine some of our other values here. 1569 */ 1570 if (!zen_umc_decode_rows(umc, dec)) { 1571 ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE); 1572 return (B_FALSE); 1573 } 1574 1575 if (!zen_umc_decode_cols(umc, dec)) { 1576 ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE); 1577 return (B_FALSE); 1578 } 1579 1580 /* 1581 * Now that we have the rows and columns we can go through and determine 1582 * the bank and bank group. This depends on the above. 1583 */ 1584 if (!zen_umc_decode_banks(umc, dec)) { 1585 ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE); 1586 return (B_FALSE); 1587 } 1588 1589 /* 1590 * If we have a DDR5 generation DIMM then we need to consider the 1591 * subchannel. This doesn't exist in DDR4 systems (the function handles 1592 * this reality). Because of potential hashing, this needs to come after 1593 * the row, column, and bank have all been determined. 1594 */ 1595 if (!zen_umc_decode_subchan(umc, dec)) { 1596 ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE); 1597 return (B_FALSE); 1598 } 1599 1600 /* 1601 * Time for the last two pieces here: the actual chip select used and 1602 * then figuring out which rank, taking into account rank 1603 * multiplication. Don't worry, these both have hashing opportunities. 1604 */ 1605 if (!zen_umc_decode_rank_mul(umc, dec)) { 1606 ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE); 1607 return (B_FALSE); 1608 } 1609 1610 if (!zen_umc_decode_chipsel(umc, dec)) { 1611 ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE); 1612 return (B_FALSE); 1613 } 1614 1615 /* 1616 * Somehow, that's it. 1617 */ 1618 return (B_TRUE); 1619 } 1620