1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * AMD Address Translation Library 4 * 5 * denormalize.c : Functions to account for interleaving bits 6 * 7 * Copyright (c) 2023, Advanced Micro Devices, Inc. 8 * All Rights Reserved. 9 * 10 * Author: Yazen Ghannam <Yazen.Ghannam@amd.com> 11 */ 12 13 #include "internal.h" 14 15 /* 16 * Returns the Destination Fabric ID. This is the first (lowest) 17 * COH_ST Fabric ID used within a DRAM Address map. 18 */ 19 static u16 get_dst_fabric_id(struct addr_ctx *ctx) 20 { 21 switch (df_cfg.rev) { 22 case DF2: return FIELD_GET(DF2_DST_FABRIC_ID, ctx->map.limit); 23 case DF3: return FIELD_GET(DF3_DST_FABRIC_ID, ctx->map.limit); 24 case DF3p5: return FIELD_GET(DF3p5_DST_FABRIC_ID, ctx->map.limit); 25 case DF4: return FIELD_GET(DF4_DST_FABRIC_ID, ctx->map.ctl); 26 case DF4p5: return FIELD_GET(DF4p5_DST_FABRIC_ID, ctx->map.ctl); 27 default: 28 atl_debug_on_bad_df_rev(); 29 return 0; 30 } 31 } 32 33 /* 34 * Make a contiguous gap in address for N bits starting at bit P. 35 * 36 * Example: 37 * address bits: [20:0] 38 * # of interleave bits (n): 3 39 * starting interleave bit (p): 8 40 * 41 * expanded address bits: [20+n : n+p][n+p-1 : p][p-1 : 0] 42 * [23 : 11][10 : 8][7 : 0] 43 */ 44 static u64 make_space_for_coh_st_id_at_intlv_bit(struct addr_ctx *ctx) 45 { 46 return expand_bits(ctx->map.intlv_bit_pos, 47 ctx->map.total_intlv_bits, 48 ctx->ret_addr); 49 } 50 51 /* 52 * Make two gaps in address for N bits. 53 * First gap is a single bit at bit P. 54 * Second gap is the remaining N-1 bits at bit 12. 55 * 56 * Example: 57 * address bits: [20:0] 58 * # of interleave bits (n): 3 59 * starting interleave bit (p): 8 60 * 61 * First gap 62 * expanded address bits: [20+1 : p+1][p][p-1 : 0] 63 * [21 : 9][8][7 : 0] 64 * 65 * Second gap uses result from first. 66 * r = n - 1; remaining interleave bits 67 * expanded address bits: [21+r : 12+r][12+r-1: 12][11 : 0] 68 * [23 : 14][13 : 12][11 : 0] 69 */ 70 static u64 make_space_for_coh_st_id_split_2_1(struct addr_ctx *ctx) 71 { 72 /* Make a single space at the interleave bit. */ 73 u64 denorm_addr = expand_bits(ctx->map.intlv_bit_pos, 1, ctx->ret_addr); 74 75 /* Done if there's only a single interleave bit. */ 76 if (ctx->map.total_intlv_bits <= 1) 77 return denorm_addr; 78 79 /* Make spaces for the remaining interleave bits starting at bit 12. */ 80 return expand_bits(12, ctx->map.total_intlv_bits - 1, denorm_addr); 81 } 82 83 /* 84 * Make space for CS ID at bits [14:8] as follows: 85 * 86 * 8 channels -> bits [10:8] 87 * 16 channels -> bits [11:8] 88 * 32 channels -> bits [14,11:8] 89 * 90 * 1 die -> N/A 91 * 2 dies -> bit [12] 92 * 4 dies -> bits [13:12] 93 */ 94 static u64 make_space_for_coh_st_id_mi300(struct addr_ctx *ctx) 95 { 96 u8 num_intlv_bits = ilog2(ctx->map.num_intlv_chan); 97 u64 denorm_addr; 98 99 if (ctx->map.intlv_bit_pos != 8) { 100 pr_debug("Invalid interleave bit: %u", ctx->map.intlv_bit_pos); 101 return ~0ULL; 102 } 103 104 /* Channel bits. Covers up to 4 bits at [11:8]. */ 105 denorm_addr = expand_bits(8, min(num_intlv_bits, 4), ctx->ret_addr); 106 107 /* Die bits. Always starts at [12]. */ 108 denorm_addr = expand_bits(12, ilog2(ctx->map.num_intlv_dies), denorm_addr); 109 110 /* Additional channel bit at [14]. */ 111 if (num_intlv_bits > 4) 112 denorm_addr = expand_bits(14, 1, denorm_addr); 113 114 return denorm_addr; 115 } 116 117 /* 118 * Take the current calculated address and shift enough bits in the middle 119 * to make a gap where the interleave bits will be inserted. 120 */ 121 static u64 make_space_for_coh_st_id(struct addr_ctx *ctx) 122 { 123 switch (ctx->map.intlv_mode) { 124 case NOHASH_2CHAN: 125 case NOHASH_4CHAN: 126 case NOHASH_8CHAN: 127 case NOHASH_16CHAN: 128 case NOHASH_32CHAN: 129 case DF2_2CHAN_HASH: 130 return make_space_for_coh_st_id_at_intlv_bit(ctx); 131 132 case DF3_COD4_2CHAN_HASH: 133 case DF3_COD2_4CHAN_HASH: 134 case DF3_COD1_8CHAN_HASH: 135 case DF4_NPS4_2CHAN_HASH: 136 case DF4_NPS2_4CHAN_HASH: 137 case DF4_NPS1_8CHAN_HASH: 138 case DF4p5_NPS4_2CHAN_1K_HASH: 139 case DF4p5_NPS4_2CHAN_2K_HASH: 140 case DF4p5_NPS2_4CHAN_2K_HASH: 141 case DF4p5_NPS1_8CHAN_2K_HASH: 142 case DF4p5_NPS1_16CHAN_2K_HASH: 143 return make_space_for_coh_st_id_split_2_1(ctx); 144 145 case MI3_HASH_8CHAN: 146 case MI3_HASH_16CHAN: 147 case MI3_HASH_32CHAN: 148 return make_space_for_coh_st_id_mi300(ctx); 149 150 default: 151 atl_debug_on_bad_intlv_mode(ctx); 152 return ~0ULL; 153 } 154 } 155 156 static u16 get_coh_st_id_df2(struct addr_ctx *ctx) 157 { 158 u8 num_socket_intlv_bits = ilog2(ctx->map.num_intlv_sockets); 159 u8 num_die_intlv_bits = ilog2(ctx->map.num_intlv_dies); 160 u8 num_intlv_bits; 161 u16 coh_st_id, mask; 162 163 coh_st_id = ctx->coh_st_fabric_id - get_dst_fabric_id(ctx); 164 165 /* Channel interleave bits */ 166 num_intlv_bits = order_base_2(ctx->map.num_intlv_chan); 167 mask = GENMASK(num_intlv_bits - 1, 0); 168 coh_st_id &= mask; 169 170 /* Die interleave bits */ 171 if (num_die_intlv_bits) { 172 u16 die_bits; 173 174 mask = GENMASK(num_die_intlv_bits - 1, 0); 175 die_bits = ctx->coh_st_fabric_id & df_cfg.die_id_mask; 176 die_bits >>= df_cfg.die_id_shift; 177 178 coh_st_id |= (die_bits & mask) << num_intlv_bits; 179 num_intlv_bits += num_die_intlv_bits; 180 } 181 182 /* Socket interleave bits */ 183 if (num_socket_intlv_bits) { 184 u16 socket_bits; 185 186 mask = GENMASK(num_socket_intlv_bits - 1, 0); 187 socket_bits = ctx->coh_st_fabric_id & df_cfg.socket_id_mask; 188 socket_bits >>= df_cfg.socket_id_shift; 189 190 coh_st_id |= (socket_bits & mask) << num_intlv_bits; 191 } 192 193 return coh_st_id; 194 } 195 196 static u16 get_coh_st_id_df4(struct addr_ctx *ctx) 197 { 198 /* 199 * Start with the original component mask and the number of interleave 200 * bits for the channels in this map. 201 */ 202 u8 num_intlv_bits = ilog2(ctx->map.num_intlv_chan); 203 u16 mask = df_cfg.component_id_mask; 204 205 u16 socket_bits; 206 207 /* Set the derived Coherent Station ID to the input Coherent Station Fabric ID. */ 208 u16 coh_st_id = ctx->coh_st_fabric_id & mask; 209 210 /* 211 * Subtract the "base" Destination Fabric ID. 212 * This accounts for systems with disabled Coherent Stations. 213 */ 214 coh_st_id -= get_dst_fabric_id(ctx) & mask; 215 216 /* 217 * Generate and use a new mask based on the number of bits 218 * needed for channel interleaving in this map. 219 */ 220 mask = GENMASK(num_intlv_bits - 1, 0); 221 coh_st_id &= mask; 222 223 /* Done if socket interleaving is not enabled. */ 224 if (ctx->map.num_intlv_sockets <= 1) 225 return coh_st_id; 226 227 /* 228 * Figure out how many bits are needed for the number of 229 * interleaved sockets. And shift the derived Coherent Station ID to account 230 * for these. 231 */ 232 num_intlv_bits = ilog2(ctx->map.num_intlv_sockets); 233 coh_st_id <<= num_intlv_bits; 234 235 /* Generate a new mask for the socket interleaving bits. */ 236 mask = GENMASK(num_intlv_bits - 1, 0); 237 238 /* Get the socket interleave bits from the original Coherent Station Fabric ID. */ 239 socket_bits = (ctx->coh_st_fabric_id & df_cfg.socket_id_mask) >> df_cfg.socket_id_shift; 240 241 /* Apply the appropriate socket bits to the derived Coherent Station ID. */ 242 coh_st_id |= socket_bits & mask; 243 244 return coh_st_id; 245 } 246 247 /* 248 * MI300 hash has: 249 * (C)hannel[3:0] = coh_st_id[3:0] 250 * (S)tack[0] = coh_st_id[4] 251 * (D)ie[1:0] = coh_st_id[6:5] 252 * 253 * Hashed coh_st_id is swizzled so that Stack bit is at the end. 254 * coh_st_id = SDDCCCC 255 */ 256 static u16 get_coh_st_id_mi300(struct addr_ctx *ctx) 257 { 258 u8 channel_bits, die_bits, stack_bit; 259 u16 die_id; 260 261 /* Subtract the "base" Destination Fabric ID. */ 262 ctx->coh_st_fabric_id -= get_dst_fabric_id(ctx); 263 264 die_id = (ctx->coh_st_fabric_id & df_cfg.die_id_mask) >> df_cfg.die_id_shift; 265 266 channel_bits = FIELD_GET(GENMASK(3, 0), ctx->coh_st_fabric_id); 267 stack_bit = FIELD_GET(BIT(4), ctx->coh_st_fabric_id) << 6; 268 die_bits = die_id << 4; 269 270 return stack_bit | die_bits | channel_bits; 271 } 272 273 /* 274 * Derive the correct Coherent Station ID that represents the interleave bits 275 * used within the system physical address. This accounts for the 276 * interleave mode, number of interleaved channels/dies/sockets, and 277 * other system/mode-specific bit swizzling. 278 * 279 * Returns: Coherent Station ID on success. 280 * All bits set on error. 281 */ 282 static u16 calculate_coh_st_id(struct addr_ctx *ctx) 283 { 284 switch (ctx->map.intlv_mode) { 285 case NOHASH_2CHAN: 286 case NOHASH_4CHAN: 287 case NOHASH_8CHAN: 288 case NOHASH_16CHAN: 289 case NOHASH_32CHAN: 290 case DF3_COD4_2CHAN_HASH: 291 case DF3_COD2_4CHAN_HASH: 292 case DF3_COD1_8CHAN_HASH: 293 case DF2_2CHAN_HASH: 294 return get_coh_st_id_df2(ctx); 295 296 case DF4_NPS4_2CHAN_HASH: 297 case DF4_NPS2_4CHAN_HASH: 298 case DF4_NPS1_8CHAN_HASH: 299 case DF4p5_NPS4_2CHAN_1K_HASH: 300 case DF4p5_NPS4_2CHAN_2K_HASH: 301 case DF4p5_NPS2_4CHAN_2K_HASH: 302 case DF4p5_NPS1_8CHAN_2K_HASH: 303 case DF4p5_NPS1_16CHAN_2K_HASH: 304 return get_coh_st_id_df4(ctx); 305 306 case MI3_HASH_8CHAN: 307 case MI3_HASH_16CHAN: 308 case MI3_HASH_32CHAN: 309 return get_coh_st_id_mi300(ctx); 310 311 /* COH_ST ID is simply the COH_ST Fabric ID adjusted by the Destination Fabric ID. */ 312 case DF4p5_NPS2_4CHAN_1K_HASH: 313 case DF4p5_NPS1_8CHAN_1K_HASH: 314 case DF4p5_NPS1_16CHAN_1K_HASH: 315 return ctx->coh_st_fabric_id - get_dst_fabric_id(ctx); 316 317 default: 318 atl_debug_on_bad_intlv_mode(ctx); 319 return ~0; 320 } 321 } 322 323 static u64 insert_coh_st_id_at_intlv_bit(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id) 324 { 325 return denorm_addr | (coh_st_id << ctx->map.intlv_bit_pos); 326 } 327 328 static u64 insert_coh_st_id_split_2_1(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id) 329 { 330 /* Insert coh_st_id[0] at the interleave bit. */ 331 denorm_addr |= (coh_st_id & BIT(0)) << ctx->map.intlv_bit_pos; 332 333 /* Insert coh_st_id[2:1] at bit 12. */ 334 denorm_addr |= (coh_st_id & GENMASK(2, 1)) << 11; 335 336 return denorm_addr; 337 } 338 339 static u64 insert_coh_st_id_split_2_2(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id) 340 { 341 /* Insert coh_st_id[1:0] at bit 8. */ 342 denorm_addr |= (coh_st_id & GENMASK(1, 0)) << 8; 343 344 /* 345 * Insert coh_st_id[n:2] at bit 12. 'n' could be 2 or 3. 346 * Grab both because bit 3 will be clear if unused. 347 */ 348 denorm_addr |= (coh_st_id & GENMASK(3, 2)) << 10; 349 350 return denorm_addr; 351 } 352 353 static u64 insert_coh_st_id(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id) 354 { 355 switch (ctx->map.intlv_mode) { 356 case NOHASH_2CHAN: 357 case NOHASH_4CHAN: 358 case NOHASH_8CHAN: 359 case NOHASH_16CHAN: 360 case NOHASH_32CHAN: 361 case MI3_HASH_8CHAN: 362 case MI3_HASH_16CHAN: 363 case MI3_HASH_32CHAN: 364 case DF2_2CHAN_HASH: 365 return insert_coh_st_id_at_intlv_bit(ctx, denorm_addr, coh_st_id); 366 367 case DF3_COD4_2CHAN_HASH: 368 case DF3_COD2_4CHAN_HASH: 369 case DF3_COD1_8CHAN_HASH: 370 case DF4_NPS4_2CHAN_HASH: 371 case DF4_NPS2_4CHAN_HASH: 372 case DF4_NPS1_8CHAN_HASH: 373 case DF4p5_NPS4_2CHAN_1K_HASH: 374 case DF4p5_NPS4_2CHAN_2K_HASH: 375 case DF4p5_NPS2_4CHAN_2K_HASH: 376 case DF4p5_NPS1_8CHAN_2K_HASH: 377 case DF4p5_NPS1_16CHAN_2K_HASH: 378 return insert_coh_st_id_split_2_1(ctx, denorm_addr, coh_st_id); 379 380 case DF4p5_NPS2_4CHAN_1K_HASH: 381 case DF4p5_NPS1_8CHAN_1K_HASH: 382 case DF4p5_NPS1_16CHAN_1K_HASH: 383 return insert_coh_st_id_split_2_2(ctx, denorm_addr, coh_st_id); 384 385 default: 386 atl_debug_on_bad_intlv_mode(ctx); 387 return ~0ULL; 388 } 389 } 390 391 /* 392 * MI300 systems have a fixed, hardware-defined physical-to-logical 393 * Coherent Station mapping. The Remap registers are not used. 394 */ 395 static const u16 phy_to_log_coh_st_map_mi300[] = { 396 12, 13, 14, 15, 397 8, 9, 10, 11, 398 4, 5, 6, 7, 399 0, 1, 2, 3, 400 28, 29, 30, 31, 401 24, 25, 26, 27, 402 20, 21, 22, 23, 403 16, 17, 18, 19, 404 }; 405 406 static u16 get_logical_coh_st_fabric_id_mi300(struct addr_ctx *ctx) 407 { 408 if (ctx->inst_id >= ARRAY_SIZE(phy_to_log_coh_st_map_mi300)) { 409 atl_debug(ctx, "Instance ID out of range"); 410 return ~0; 411 } 412 413 return phy_to_log_coh_st_map_mi300[ctx->inst_id] | (ctx->node_id << df_cfg.node_id_shift); 414 } 415 416 static u16 get_logical_coh_st_fabric_id(struct addr_ctx *ctx) 417 { 418 u16 component_id, log_fabric_id; 419 420 /* Start with the physical COH_ST Fabric ID. */ 421 u16 phys_fabric_id = ctx->coh_st_fabric_id; 422 423 if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous) 424 return get_logical_coh_st_fabric_id_mi300(ctx); 425 426 /* Skip logical ID lookup if remapping is disabled. */ 427 if (!FIELD_GET(DF4_REMAP_EN, ctx->map.ctl) && 428 ctx->map.intlv_mode != DF3_6CHAN) 429 return phys_fabric_id; 430 431 /* Mask off the Node ID bits to get the "local" Component ID. */ 432 component_id = phys_fabric_id & df_cfg.component_id_mask; 433 434 /* 435 * Search the list of logical Component IDs for the one that 436 * matches this physical Component ID. 437 */ 438 for (log_fabric_id = 0; log_fabric_id < MAX_COH_ST_CHANNELS; log_fabric_id++) { 439 if (ctx->map.remap_array[log_fabric_id] == component_id) 440 break; 441 } 442 443 if (log_fabric_id == MAX_COH_ST_CHANNELS) 444 atl_debug(ctx, "COH_ST remap entry not found for 0x%x", 445 log_fabric_id); 446 447 /* Get the Node ID bits from the physical and apply to the logical. */ 448 return (phys_fabric_id & df_cfg.node_id_mask) | log_fabric_id; 449 } 450 451 static u16 get_logical_coh_st_fabric_id_for_current_spa(struct addr_ctx *ctx, 452 struct df4p5_denorm_ctx *denorm_ctx) 453 { 454 bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G, hash_ctl_1T; 455 bool hash_pa8, hash_pa9, hash_pa12, hash_pa13; 456 u64 cs_id = 0; 457 458 hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl); 459 hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl); 460 hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl); 461 hash_ctl_1T = FIELD_GET(DF4p5_HASH_CTL_1T, ctx->map.ctl); 462 463 hash_pa8 = FIELD_GET(BIT_ULL(8), denorm_ctx->current_spa); 464 hash_pa8 ^= FIELD_GET(BIT_ULL(14), denorm_ctx->current_spa); 465 hash_pa8 ^= FIELD_GET(BIT_ULL(16), denorm_ctx->current_spa) & hash_ctl_64k; 466 hash_pa8 ^= FIELD_GET(BIT_ULL(21), denorm_ctx->current_spa) & hash_ctl_2M; 467 hash_pa8 ^= FIELD_GET(BIT_ULL(30), denorm_ctx->current_spa) & hash_ctl_1G; 468 hash_pa8 ^= FIELD_GET(BIT_ULL(40), denorm_ctx->current_spa) & hash_ctl_1T; 469 470 hash_pa9 = FIELD_GET(BIT_ULL(9), denorm_ctx->current_spa); 471 hash_pa9 ^= FIELD_GET(BIT_ULL(17), denorm_ctx->current_spa) & hash_ctl_64k; 472 hash_pa9 ^= FIELD_GET(BIT_ULL(22), denorm_ctx->current_spa) & hash_ctl_2M; 473 hash_pa9 ^= FIELD_GET(BIT_ULL(31), denorm_ctx->current_spa) & hash_ctl_1G; 474 hash_pa9 ^= FIELD_GET(BIT_ULL(41), denorm_ctx->current_spa) & hash_ctl_1T; 475 476 hash_pa12 = FIELD_GET(BIT_ULL(12), denorm_ctx->current_spa); 477 hash_pa12 ^= FIELD_GET(BIT_ULL(18), denorm_ctx->current_spa) & hash_ctl_64k; 478 hash_pa12 ^= FIELD_GET(BIT_ULL(23), denorm_ctx->current_spa) & hash_ctl_2M; 479 hash_pa12 ^= FIELD_GET(BIT_ULL(32), denorm_ctx->current_spa) & hash_ctl_1G; 480 hash_pa12 ^= FIELD_GET(BIT_ULL(42), denorm_ctx->current_spa) & hash_ctl_1T; 481 482 hash_pa13 = FIELD_GET(BIT_ULL(13), denorm_ctx->current_spa); 483 hash_pa13 ^= FIELD_GET(BIT_ULL(19), denorm_ctx->current_spa) & hash_ctl_64k; 484 hash_pa13 ^= FIELD_GET(BIT_ULL(24), denorm_ctx->current_spa) & hash_ctl_2M; 485 hash_pa13 ^= FIELD_GET(BIT_ULL(33), denorm_ctx->current_spa) & hash_ctl_1G; 486 hash_pa13 ^= FIELD_GET(BIT_ULL(43), denorm_ctx->current_spa) & hash_ctl_1T; 487 488 switch (ctx->map.intlv_mode) { 489 case DF4p5_NPS0_24CHAN_1K_HASH: 490 cs_id = FIELD_GET(GENMASK_ULL(63, 13), denorm_ctx->current_spa) << 3; 491 cs_id %= denorm_ctx->mod_value; 492 cs_id <<= 2; 493 cs_id |= (hash_pa9 | (hash_pa12 << 1)); 494 cs_id |= hash_pa8 << df_cfg.socket_id_shift; 495 break; 496 497 case DF4p5_NPS0_24CHAN_2K_HASH: 498 cs_id = FIELD_GET(GENMASK_ULL(63, 14), denorm_ctx->current_spa) << 4; 499 cs_id %= denorm_ctx->mod_value; 500 cs_id <<= 2; 501 cs_id |= (hash_pa12 | (hash_pa13 << 1)); 502 cs_id |= hash_pa8 << df_cfg.socket_id_shift; 503 break; 504 505 case DF4p5_NPS1_12CHAN_1K_HASH: 506 cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2; 507 cs_id %= denorm_ctx->mod_value; 508 cs_id <<= 2; 509 cs_id |= (hash_pa8 | (hash_pa9 << 1)); 510 break; 511 512 case DF4p5_NPS1_12CHAN_2K_HASH: 513 cs_id = FIELD_GET(GENMASK_ULL(63, 13), denorm_ctx->current_spa) << 3; 514 cs_id %= denorm_ctx->mod_value; 515 cs_id <<= 2; 516 cs_id |= (hash_pa8 | (hash_pa12 << 1)); 517 break; 518 519 case DF4p5_NPS2_6CHAN_1K_HASH: 520 case DF4p5_NPS1_10CHAN_1K_HASH: 521 cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2; 522 cs_id |= (FIELD_GET(BIT_ULL(9), denorm_ctx->current_spa) << 1); 523 cs_id %= denorm_ctx->mod_value; 524 cs_id <<= 1; 525 cs_id |= hash_pa8; 526 break; 527 528 case DF4p5_NPS2_6CHAN_2K_HASH: 529 case DF4p5_NPS1_10CHAN_2K_HASH: 530 cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2; 531 cs_id %= denorm_ctx->mod_value; 532 cs_id <<= 1; 533 cs_id |= hash_pa8; 534 break; 535 536 case DF4p5_NPS4_3CHAN_1K_HASH: 537 case DF4p5_NPS2_5CHAN_1K_HASH: 538 cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2; 539 cs_id |= FIELD_GET(GENMASK_ULL(9, 8), denorm_ctx->current_spa); 540 cs_id %= denorm_ctx->mod_value; 541 break; 542 543 case DF4p5_NPS4_3CHAN_2K_HASH: 544 case DF4p5_NPS2_5CHAN_2K_HASH: 545 cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2; 546 cs_id |= FIELD_GET(BIT_ULL(8), denorm_ctx->current_spa) << 1; 547 cs_id %= denorm_ctx->mod_value; 548 break; 549 550 default: 551 atl_debug_on_bad_intlv_mode(ctx); 552 return 0; 553 } 554 555 if (cs_id > 0xffff) { 556 atl_debug(ctx, "Translation error: Resulting cs_id larger than u16\n"); 557 return 0; 558 } 559 560 return cs_id; 561 } 562 563 static int denorm_addr_common(struct addr_ctx *ctx) 564 { 565 u64 denorm_addr; 566 u16 coh_st_id; 567 568 /* 569 * Convert the original physical COH_ST Fabric ID to a logical value. 570 * This is required for non-power-of-two and other interleaving modes. 571 */ 572 ctx->coh_st_fabric_id = get_logical_coh_st_fabric_id(ctx); 573 574 denorm_addr = make_space_for_coh_st_id(ctx); 575 coh_st_id = calculate_coh_st_id(ctx); 576 ctx->ret_addr = insert_coh_st_id(ctx, denorm_addr, coh_st_id); 577 return 0; 578 } 579 580 static int denorm_addr_df3_6chan(struct addr_ctx *ctx) 581 { 582 u16 coh_st_id = ctx->coh_st_fabric_id & df_cfg.component_id_mask; 583 u8 total_intlv_bits = ctx->map.total_intlv_bits; 584 u8 low_bit, intlv_bit = ctx->map.intlv_bit_pos; 585 u64 msb_intlv_bits, temp_addr_a, temp_addr_b; 586 u8 np2_bits = ctx->map.np2_bits; 587 588 if (ctx->map.intlv_mode != DF3_6CHAN) 589 return -EINVAL; 590 591 /* 592 * 'np2_bits' holds the number of bits needed to cover the 593 * amount of memory (rounded up) in this map using 64K chunks. 594 * 595 * Example: 596 * Total memory in map: 6GB 597 * Rounded up to next power-of-2: 8GB 598 * Number of 64K chunks: 0x20000 599 * np2_bits = log2(# of chunks): 17 600 * 601 * Get the two most-significant interleave bits from the 602 * input address based on the following: 603 * 604 * [15 + np2_bits - total_intlv_bits : 14 + np2_bits - total_intlv_bits] 605 */ 606 low_bit = 14 + np2_bits - total_intlv_bits; 607 msb_intlv_bits = ctx->ret_addr >> low_bit; 608 msb_intlv_bits &= 0x3; 609 610 /* 611 * If MSB are 11b, then logical COH_ST ID is 6 or 7. 612 * Need to adjust based on the mod3 result. 613 */ 614 if (msb_intlv_bits == 3) { 615 u8 addr_mod, phys_addr_msb, msb_coh_st_id; 616 617 /* Get the remaining interleave bits from the input address. */ 618 temp_addr_b = GENMASK_ULL(low_bit - 1, intlv_bit) & ctx->ret_addr; 619 temp_addr_b >>= intlv_bit; 620 621 /* Calculate the logical COH_ST offset based on mod3. */ 622 addr_mod = temp_addr_b % 3; 623 624 /* Get COH_ST ID bits [2:1]. */ 625 msb_coh_st_id = (coh_st_id >> 1) & 0x3; 626 627 /* Get the bit that starts the physical address bits. */ 628 phys_addr_msb = (intlv_bit + np2_bits + 1); 629 phys_addr_msb &= BIT(0); 630 phys_addr_msb++; 631 phys_addr_msb *= 3 - addr_mod + msb_coh_st_id; 632 phys_addr_msb %= 3; 633 634 /* Move the physical address MSB to the correct place. */ 635 temp_addr_b |= phys_addr_msb << (low_bit - total_intlv_bits - intlv_bit); 636 637 /* Generate a new COH_ST ID as follows: coh_st_id = [1, 1, coh_st_id[0]] */ 638 coh_st_id &= BIT(0); 639 coh_st_id |= GENMASK(2, 1); 640 } else { 641 temp_addr_b = GENMASK_ULL(63, intlv_bit) & ctx->ret_addr; 642 temp_addr_b >>= intlv_bit; 643 } 644 645 temp_addr_a = GENMASK_ULL(intlv_bit - 1, 0) & ctx->ret_addr; 646 temp_addr_b <<= intlv_bit + total_intlv_bits; 647 648 ctx->ret_addr = temp_addr_a | temp_addr_b; 649 ctx->ret_addr |= coh_st_id << intlv_bit; 650 return 0; 651 } 652 653 static int denorm_addr_df4_np2(struct addr_ctx *ctx) 654 { 655 bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G; 656 u16 group, group_offset, log_coh_st_offset; 657 unsigned int mod_value, shift_value; 658 u16 mask = df_cfg.component_id_mask; 659 u64 temp_addr_a, temp_addr_b; 660 bool hash_pa8, hashed_bit; 661 662 switch (ctx->map.intlv_mode) { 663 case DF4_NPS4_3CHAN_HASH: 664 mod_value = 3; 665 shift_value = 13; 666 break; 667 case DF4_NPS2_6CHAN_HASH: 668 mod_value = 3; 669 shift_value = 12; 670 break; 671 case DF4_NPS1_12CHAN_HASH: 672 mod_value = 3; 673 shift_value = 11; 674 break; 675 case DF4_NPS2_5CHAN_HASH: 676 mod_value = 5; 677 shift_value = 13; 678 break; 679 case DF4_NPS1_10CHAN_HASH: 680 mod_value = 5; 681 shift_value = 12; 682 break; 683 default: 684 atl_debug_on_bad_intlv_mode(ctx); 685 return -EINVAL; 686 }; 687 688 if (ctx->map.num_intlv_sockets == 1) { 689 hash_pa8 = BIT_ULL(shift_value) & ctx->ret_addr; 690 temp_addr_a = remove_bits(shift_value, shift_value, ctx->ret_addr); 691 } else { 692 hash_pa8 = ctx->coh_st_fabric_id & df_cfg.socket_id_mask; 693 temp_addr_a = ctx->ret_addr; 694 } 695 696 /* Make a gap for the real bit [8]. */ 697 temp_addr_a = expand_bits(8, 1, temp_addr_a); 698 699 /* Make an additional gap for bits [13:12], as appropriate.*/ 700 if (ctx->map.intlv_mode == DF4_NPS2_6CHAN_HASH || 701 ctx->map.intlv_mode == DF4_NPS1_10CHAN_HASH) { 702 temp_addr_a = expand_bits(13, 1, temp_addr_a); 703 } else if (ctx->map.intlv_mode == DF4_NPS1_12CHAN_HASH) { 704 temp_addr_a = expand_bits(12, 2, temp_addr_a); 705 } 706 707 /* Keep bits [13:0]. */ 708 temp_addr_a &= GENMASK_ULL(13, 0); 709 710 /* Get the appropriate high bits. */ 711 shift_value += 1 - ilog2(ctx->map.num_intlv_sockets); 712 temp_addr_b = GENMASK_ULL(63, shift_value) & ctx->ret_addr; 713 temp_addr_b >>= shift_value; 714 temp_addr_b *= mod_value; 715 716 /* 717 * Coherent Stations are divided into groups. 718 * 719 * Multiples of 3 (mod3) are divided into quadrants. 720 * e.g. NP4_3CHAN -> [0, 1, 2] [6, 7, 8] 721 * [3, 4, 5] [9, 10, 11] 722 * 723 * Multiples of 5 (mod5) are divided into sides. 724 * e.g. NP2_5CHAN -> [0, 1, 2, 3, 4] [5, 6, 7, 8, 9] 725 */ 726 727 /* 728 * Calculate the logical offset for the COH_ST within its DRAM Address map. 729 * e.g. if map includes [5, 6, 7, 8, 9] and target instance is '8', then 730 * log_coh_st_offset = 8 - 5 = 3 731 */ 732 log_coh_st_offset = (ctx->coh_st_fabric_id & mask) - (get_dst_fabric_id(ctx) & mask); 733 734 /* 735 * Figure out the group number. 736 * 737 * Following above example, 738 * log_coh_st_offset = 3 739 * mod_value = 5 740 * group = 3 / 5 = 0 741 */ 742 group = log_coh_st_offset / mod_value; 743 744 /* 745 * Figure out the offset within the group. 746 * 747 * Following above example, 748 * log_coh_st_offset = 3 749 * mod_value = 5 750 * group_offset = 3 % 5 = 3 751 */ 752 group_offset = log_coh_st_offset % mod_value; 753 754 /* Adjust group_offset if the hashed bit [8] is set. */ 755 if (hash_pa8) { 756 if (!group_offset) 757 group_offset = mod_value - 1; 758 else 759 group_offset--; 760 } 761 762 /* Add in the group offset to the high bits. */ 763 temp_addr_b += group_offset; 764 765 /* Shift the high bits to the proper starting position. */ 766 temp_addr_b <<= 14; 767 768 /* Combine the high and low bits together. */ 769 ctx->ret_addr = temp_addr_a | temp_addr_b; 770 771 /* Account for hashing here instead of in dehash_address(). */ 772 hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl); 773 hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl); 774 hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl); 775 776 hashed_bit = !!hash_pa8; 777 hashed_bit ^= FIELD_GET(BIT_ULL(14), ctx->ret_addr); 778 hashed_bit ^= FIELD_GET(BIT_ULL(16), ctx->ret_addr) & hash_ctl_64k; 779 hashed_bit ^= FIELD_GET(BIT_ULL(21), ctx->ret_addr) & hash_ctl_2M; 780 hashed_bit ^= FIELD_GET(BIT_ULL(30), ctx->ret_addr) & hash_ctl_1G; 781 782 ctx->ret_addr |= hashed_bit << 8; 783 784 /* Done for 3 and 5 channel. */ 785 if (ctx->map.intlv_mode == DF4_NPS4_3CHAN_HASH || 786 ctx->map.intlv_mode == DF4_NPS2_5CHAN_HASH) 787 return 0; 788 789 /* Select the proper 'group' bit to use for Bit 13. */ 790 if (ctx->map.intlv_mode == DF4_NPS1_12CHAN_HASH) 791 hashed_bit = !!(group & BIT(1)); 792 else 793 hashed_bit = group & BIT(0); 794 795 hashed_bit ^= FIELD_GET(BIT_ULL(18), ctx->ret_addr) & hash_ctl_64k; 796 hashed_bit ^= FIELD_GET(BIT_ULL(23), ctx->ret_addr) & hash_ctl_2M; 797 hashed_bit ^= FIELD_GET(BIT_ULL(32), ctx->ret_addr) & hash_ctl_1G; 798 799 ctx->ret_addr |= hashed_bit << 13; 800 801 /* Done for 6 and 10 channel. */ 802 if (ctx->map.intlv_mode != DF4_NPS1_12CHAN_HASH) 803 return 0; 804 805 hashed_bit = group & BIT(0); 806 hashed_bit ^= FIELD_GET(BIT_ULL(17), ctx->ret_addr) & hash_ctl_64k; 807 hashed_bit ^= FIELD_GET(BIT_ULL(22), ctx->ret_addr) & hash_ctl_2M; 808 hashed_bit ^= FIELD_GET(BIT_ULL(31), ctx->ret_addr) & hash_ctl_1G; 809 810 ctx->ret_addr |= hashed_bit << 12; 811 return 0; 812 } 813 814 static u64 normalize_addr_df4p5_np2(struct addr_ctx *ctx, struct df4p5_denorm_ctx *denorm_ctx, 815 u64 addr) 816 { 817 u64 temp_addr_a = 0, temp_addr_b = 0; 818 819 switch (ctx->map.intlv_mode) { 820 case DF4p5_NPS0_24CHAN_1K_HASH: 821 case DF4p5_NPS1_12CHAN_1K_HASH: 822 case DF4p5_NPS2_6CHAN_1K_HASH: 823 case DF4p5_NPS4_3CHAN_1K_HASH: 824 case DF4p5_NPS1_10CHAN_1K_HASH: 825 case DF4p5_NPS2_5CHAN_1K_HASH: 826 temp_addr_a = FIELD_GET(GENMASK_ULL(11, 10), addr) << 8; 827 break; 828 829 case DF4p5_NPS0_24CHAN_2K_HASH: 830 case DF4p5_NPS1_12CHAN_2K_HASH: 831 case DF4p5_NPS2_6CHAN_2K_HASH: 832 case DF4p5_NPS4_3CHAN_2K_HASH: 833 case DF4p5_NPS1_10CHAN_2K_HASH: 834 case DF4p5_NPS2_5CHAN_2K_HASH: 835 temp_addr_a = FIELD_GET(GENMASK_ULL(11, 9), addr) << 8; 836 break; 837 838 default: 839 atl_debug_on_bad_intlv_mode(ctx); 840 return 0; 841 } 842 843 switch (ctx->map.intlv_mode) { 844 case DF4p5_NPS0_24CHAN_1K_HASH: 845 temp_addr_b = FIELD_GET(GENMASK_ULL(63, 13), addr) / denorm_ctx->mod_value; 846 temp_addr_b <<= 10; 847 break; 848 849 case DF4p5_NPS0_24CHAN_2K_HASH: 850 temp_addr_b = FIELD_GET(GENMASK_ULL(63, 14), addr) / denorm_ctx->mod_value; 851 temp_addr_b <<= 11; 852 break; 853 854 case DF4p5_NPS1_12CHAN_1K_HASH: 855 temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) / denorm_ctx->mod_value; 856 temp_addr_b <<= 10; 857 break; 858 859 case DF4p5_NPS1_12CHAN_2K_HASH: 860 temp_addr_b = FIELD_GET(GENMASK_ULL(63, 13), addr) / denorm_ctx->mod_value; 861 temp_addr_b <<= 11; 862 break; 863 864 case DF4p5_NPS2_6CHAN_1K_HASH: 865 case DF4p5_NPS1_10CHAN_1K_HASH: 866 temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) << 1; 867 temp_addr_b |= FIELD_GET(BIT_ULL(9), addr); 868 temp_addr_b /= denorm_ctx->mod_value; 869 temp_addr_b <<= 10; 870 break; 871 872 case DF4p5_NPS2_6CHAN_2K_HASH: 873 case DF4p5_NPS1_10CHAN_2K_HASH: 874 temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) / denorm_ctx->mod_value; 875 temp_addr_b <<= 11; 876 break; 877 878 case DF4p5_NPS4_3CHAN_1K_HASH: 879 case DF4p5_NPS2_5CHAN_1K_HASH: 880 temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) << 2; 881 temp_addr_b |= FIELD_GET(GENMASK_ULL(9, 8), addr); 882 temp_addr_b /= denorm_ctx->mod_value; 883 temp_addr_b <<= 10; 884 break; 885 886 case DF4p5_NPS4_3CHAN_2K_HASH: 887 case DF4p5_NPS2_5CHAN_2K_HASH: 888 temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) << 1; 889 temp_addr_b |= FIELD_GET(BIT_ULL(8), addr); 890 temp_addr_b /= denorm_ctx->mod_value; 891 temp_addr_b <<= 11; 892 break; 893 894 default: 895 atl_debug_on_bad_intlv_mode(ctx); 896 return 0; 897 } 898 899 return denorm_ctx->base_denorm_addr | temp_addr_a | temp_addr_b; 900 } 901 902 static void recalculate_hashed_bits_df4p5_np2(struct addr_ctx *ctx, 903 struct df4p5_denorm_ctx *denorm_ctx) 904 { 905 bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G, hash_ctl_1T, hashed_bit; 906 907 if (!denorm_ctx->rehash_vector) 908 return; 909 910 hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl); 911 hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl); 912 hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl); 913 hash_ctl_1T = FIELD_GET(DF4p5_HASH_CTL_1T, ctx->map.ctl); 914 915 if (denorm_ctx->rehash_vector & BIT_ULL(8)) { 916 hashed_bit = FIELD_GET(BIT_ULL(8), denorm_ctx->current_spa); 917 hashed_bit ^= FIELD_GET(BIT_ULL(14), denorm_ctx->current_spa); 918 hashed_bit ^= FIELD_GET(BIT_ULL(16), denorm_ctx->current_spa) & hash_ctl_64k; 919 hashed_bit ^= FIELD_GET(BIT_ULL(21), denorm_ctx->current_spa) & hash_ctl_2M; 920 hashed_bit ^= FIELD_GET(BIT_ULL(30), denorm_ctx->current_spa) & hash_ctl_1G; 921 hashed_bit ^= FIELD_GET(BIT_ULL(40), denorm_ctx->current_spa) & hash_ctl_1T; 922 923 if (FIELD_GET(BIT_ULL(8), denorm_ctx->current_spa) != hashed_bit) 924 denorm_ctx->current_spa ^= BIT_ULL(8); 925 } 926 927 if (denorm_ctx->rehash_vector & BIT_ULL(9)) { 928 hashed_bit = FIELD_GET(BIT_ULL(9), denorm_ctx->current_spa); 929 hashed_bit ^= FIELD_GET(BIT_ULL(17), denorm_ctx->current_spa) & hash_ctl_64k; 930 hashed_bit ^= FIELD_GET(BIT_ULL(22), denorm_ctx->current_spa) & hash_ctl_2M; 931 hashed_bit ^= FIELD_GET(BIT_ULL(31), denorm_ctx->current_spa) & hash_ctl_1G; 932 hashed_bit ^= FIELD_GET(BIT_ULL(41), denorm_ctx->current_spa) & hash_ctl_1T; 933 934 if (FIELD_GET(BIT_ULL(9), denorm_ctx->current_spa) != hashed_bit) 935 denorm_ctx->current_spa ^= BIT_ULL(9); 936 } 937 938 if (denorm_ctx->rehash_vector & BIT_ULL(12)) { 939 hashed_bit = FIELD_GET(BIT_ULL(12), denorm_ctx->current_spa); 940 hashed_bit ^= FIELD_GET(BIT_ULL(18), denorm_ctx->current_spa) & hash_ctl_64k; 941 hashed_bit ^= FIELD_GET(BIT_ULL(23), denorm_ctx->current_spa) & hash_ctl_2M; 942 hashed_bit ^= FIELD_GET(BIT_ULL(32), denorm_ctx->current_spa) & hash_ctl_1G; 943 hashed_bit ^= FIELD_GET(BIT_ULL(42), denorm_ctx->current_spa) & hash_ctl_1T; 944 945 if (FIELD_GET(BIT_ULL(12), denorm_ctx->current_spa) != hashed_bit) 946 denorm_ctx->current_spa ^= BIT_ULL(12); 947 } 948 949 if (denorm_ctx->rehash_vector & BIT_ULL(13)) { 950 hashed_bit = FIELD_GET(BIT_ULL(13), denorm_ctx->current_spa); 951 hashed_bit ^= FIELD_GET(BIT_ULL(19), denorm_ctx->current_spa) & hash_ctl_64k; 952 hashed_bit ^= FIELD_GET(BIT_ULL(24), denorm_ctx->current_spa) & hash_ctl_2M; 953 hashed_bit ^= FIELD_GET(BIT_ULL(33), denorm_ctx->current_spa) & hash_ctl_1G; 954 hashed_bit ^= FIELD_GET(BIT_ULL(43), denorm_ctx->current_spa) & hash_ctl_1T; 955 956 if (FIELD_GET(BIT_ULL(13), denorm_ctx->current_spa) != hashed_bit) 957 denorm_ctx->current_spa ^= BIT_ULL(13); 958 } 959 } 960 961 static bool match_logical_coh_st_fabric_id(struct addr_ctx *ctx, 962 struct df4p5_denorm_ctx *denorm_ctx) 963 { 964 /* 965 * The logical CS fabric ID of the permutation must be calculated from the 966 * current SPA with the base and with the MMIO hole. 967 */ 968 u16 id = get_logical_coh_st_fabric_id_for_current_spa(ctx, denorm_ctx); 969 970 atl_debug(ctx, "Checking calculated logical coherent station fabric id:\n"); 971 atl_debug(ctx, " calculated fabric id = 0x%x\n", id); 972 atl_debug(ctx, " expected fabric id = 0x%x\n", denorm_ctx->coh_st_fabric_id); 973 974 return denorm_ctx->coh_st_fabric_id == id; 975 } 976 977 static bool match_norm_addr(struct addr_ctx *ctx, struct df4p5_denorm_ctx *denorm_ctx) 978 { 979 u64 addr = remove_base_and_hole(ctx, denorm_ctx->current_spa); 980 981 /* 982 * The normalized address must be calculated with the current SPA without 983 * the base and without the MMIO hole. 984 */ 985 addr = normalize_addr_df4p5_np2(ctx, denorm_ctx, addr); 986 987 atl_debug(ctx, "Checking calculated normalized address:\n"); 988 atl_debug(ctx, " calculated normalized addr = 0x%016llx\n", addr); 989 atl_debug(ctx, " expected normalized addr = 0x%016llx\n", ctx->ret_addr); 990 991 return addr == ctx->ret_addr; 992 } 993 994 static int check_permutations(struct addr_ctx *ctx, struct df4p5_denorm_ctx *denorm_ctx) 995 { 996 u64 test_perm, temp_addr, denorm_addr, num_perms; 997 unsigned int dropped_remainder; 998 999 denorm_ctx->div_addr *= denorm_ctx->mod_value; 1000 1001 /* 1002 * The high order bits of num_permutations represent the permutations 1003 * of the dropped remainder. This will be either 0-3 or 0-5 depending 1004 * on the interleave mode. The low order bits represent the 1005 * permutations of other "lost" bits which will be any combination of 1006 * 1, 2, or 3 bits depending on the interleave mode. 1007 */ 1008 num_perms = denorm_ctx->mod_value << denorm_ctx->perm_shift; 1009 1010 for (test_perm = 0; test_perm < num_perms; test_perm++) { 1011 denorm_addr = denorm_ctx->base_denorm_addr; 1012 dropped_remainder = test_perm >> denorm_ctx->perm_shift; 1013 temp_addr = denorm_ctx->div_addr + dropped_remainder; 1014 1015 switch (ctx->map.intlv_mode) { 1016 case DF4p5_NPS0_24CHAN_2K_HASH: 1017 denorm_addr |= temp_addr << 14; 1018 break; 1019 1020 case DF4p5_NPS0_24CHAN_1K_HASH: 1021 case DF4p5_NPS1_12CHAN_2K_HASH: 1022 denorm_addr |= temp_addr << 13; 1023 break; 1024 1025 case DF4p5_NPS1_12CHAN_1K_HASH: 1026 case DF4p5_NPS2_6CHAN_2K_HASH: 1027 case DF4p5_NPS1_10CHAN_2K_HASH: 1028 denorm_addr |= temp_addr << 12; 1029 break; 1030 1031 case DF4p5_NPS2_6CHAN_1K_HASH: 1032 case DF4p5_NPS1_10CHAN_1K_HASH: 1033 denorm_addr |= FIELD_GET(BIT_ULL(0), temp_addr) << 9; 1034 denorm_addr |= FIELD_GET(GENMASK_ULL(63, 1), temp_addr) << 12; 1035 break; 1036 1037 case DF4p5_NPS4_3CHAN_1K_HASH: 1038 case DF4p5_NPS2_5CHAN_1K_HASH: 1039 denorm_addr |= FIELD_GET(GENMASK_ULL(1, 0), temp_addr) << 8; 1040 denorm_addr |= FIELD_GET(GENMASK_ULL(63, 2), (temp_addr)) << 12; 1041 break; 1042 1043 case DF4p5_NPS4_3CHAN_2K_HASH: 1044 case DF4p5_NPS2_5CHAN_2K_HASH: 1045 denorm_addr |= FIELD_GET(BIT_ULL(0), temp_addr) << 8; 1046 denorm_addr |= FIELD_GET(GENMASK_ULL(63, 1), temp_addr) << 12; 1047 break; 1048 1049 default: 1050 atl_debug_on_bad_intlv_mode(ctx); 1051 return -EINVAL; 1052 } 1053 1054 switch (ctx->map.intlv_mode) { 1055 case DF4p5_NPS0_24CHAN_1K_HASH: 1056 denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8; 1057 denorm_addr |= FIELD_GET(BIT_ULL(1), test_perm) << 9; 1058 denorm_addr |= FIELD_GET(BIT_ULL(2), test_perm) << 12; 1059 break; 1060 1061 case DF4p5_NPS0_24CHAN_2K_HASH: 1062 denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8; 1063 denorm_addr |= FIELD_GET(BIT_ULL(1), test_perm) << 12; 1064 denorm_addr |= FIELD_GET(BIT_ULL(2), test_perm) << 13; 1065 break; 1066 1067 case DF4p5_NPS1_12CHAN_2K_HASH: 1068 denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8; 1069 denorm_addr |= FIELD_GET(BIT_ULL(1), test_perm) << 12; 1070 break; 1071 1072 case DF4p5_NPS1_12CHAN_1K_HASH: 1073 case DF4p5_NPS4_3CHAN_1K_HASH: 1074 case DF4p5_NPS2_5CHAN_1K_HASH: 1075 denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8; 1076 denorm_addr |= FIELD_GET(BIT_ULL(1), test_perm) << 9; 1077 break; 1078 1079 case DF4p5_NPS2_6CHAN_1K_HASH: 1080 case DF4p5_NPS2_6CHAN_2K_HASH: 1081 case DF4p5_NPS4_3CHAN_2K_HASH: 1082 case DF4p5_NPS1_10CHAN_1K_HASH: 1083 case DF4p5_NPS1_10CHAN_2K_HASH: 1084 case DF4p5_NPS2_5CHAN_2K_HASH: 1085 denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8; 1086 break; 1087 1088 default: 1089 atl_debug_on_bad_intlv_mode(ctx); 1090 return -EINVAL; 1091 } 1092 1093 denorm_ctx->current_spa = add_base_and_hole(ctx, denorm_addr); 1094 recalculate_hashed_bits_df4p5_np2(ctx, denorm_ctx); 1095 1096 atl_debug(ctx, "Checking potential system physical address 0x%016llx\n", 1097 denorm_ctx->current_spa); 1098 1099 if (!match_logical_coh_st_fabric_id(ctx, denorm_ctx)) 1100 continue; 1101 1102 if (!match_norm_addr(ctx, denorm_ctx)) 1103 continue; 1104 1105 if (denorm_ctx->resolved_spa == INVALID_SPA || 1106 denorm_ctx->current_spa > denorm_ctx->resolved_spa) 1107 denorm_ctx->resolved_spa = denorm_ctx->current_spa; 1108 } 1109 1110 if (denorm_ctx->resolved_spa == INVALID_SPA) { 1111 atl_debug(ctx, "Failed to find valid SPA for normalized address 0x%016llx\n", 1112 ctx->ret_addr); 1113 return -EINVAL; 1114 } 1115 1116 /* Return the resolved SPA without the base, without the MMIO hole */ 1117 ctx->ret_addr = remove_base_and_hole(ctx, denorm_ctx->resolved_spa); 1118 1119 return 0; 1120 } 1121 1122 static int init_df4p5_denorm_ctx(struct addr_ctx *ctx, struct df4p5_denorm_ctx *denorm_ctx) 1123 { 1124 denorm_ctx->current_spa = INVALID_SPA; 1125 denorm_ctx->resolved_spa = INVALID_SPA; 1126 1127 switch (ctx->map.intlv_mode) { 1128 case DF4p5_NPS0_24CHAN_1K_HASH: 1129 denorm_ctx->perm_shift = 3; 1130 denorm_ctx->rehash_vector = BIT(8) | BIT(9) | BIT(12); 1131 break; 1132 1133 case DF4p5_NPS0_24CHAN_2K_HASH: 1134 denorm_ctx->perm_shift = 3; 1135 denorm_ctx->rehash_vector = BIT(8) | BIT(12) | BIT(13); 1136 break; 1137 1138 case DF4p5_NPS1_12CHAN_1K_HASH: 1139 denorm_ctx->perm_shift = 2; 1140 denorm_ctx->rehash_vector = BIT(8); 1141 break; 1142 1143 case DF4p5_NPS1_12CHAN_2K_HASH: 1144 denorm_ctx->perm_shift = 2; 1145 denorm_ctx->rehash_vector = BIT(8) | BIT(12); 1146 break; 1147 1148 case DF4p5_NPS2_6CHAN_1K_HASH: 1149 case DF4p5_NPS2_6CHAN_2K_HASH: 1150 case DF4p5_NPS1_10CHAN_1K_HASH: 1151 case DF4p5_NPS1_10CHAN_2K_HASH: 1152 denorm_ctx->perm_shift = 1; 1153 denorm_ctx->rehash_vector = BIT(8); 1154 break; 1155 1156 case DF4p5_NPS4_3CHAN_1K_HASH: 1157 case DF4p5_NPS2_5CHAN_1K_HASH: 1158 denorm_ctx->perm_shift = 2; 1159 denorm_ctx->rehash_vector = 0; 1160 break; 1161 1162 case DF4p5_NPS4_3CHAN_2K_HASH: 1163 case DF4p5_NPS2_5CHAN_2K_HASH: 1164 denorm_ctx->perm_shift = 1; 1165 denorm_ctx->rehash_vector = 0; 1166 break; 1167 1168 default: 1169 atl_debug_on_bad_intlv_mode(ctx); 1170 return -EINVAL; 1171 } 1172 1173 denorm_ctx->base_denorm_addr = FIELD_GET(GENMASK_ULL(7, 0), ctx->ret_addr); 1174 1175 switch (ctx->map.intlv_mode) { 1176 case DF4p5_NPS0_24CHAN_1K_HASH: 1177 case DF4p5_NPS1_12CHAN_1K_HASH: 1178 case DF4p5_NPS2_6CHAN_1K_HASH: 1179 case DF4p5_NPS4_3CHAN_1K_HASH: 1180 case DF4p5_NPS1_10CHAN_1K_HASH: 1181 case DF4p5_NPS2_5CHAN_1K_HASH: 1182 denorm_ctx->base_denorm_addr |= FIELD_GET(GENMASK_ULL(9, 8), ctx->ret_addr) << 10; 1183 denorm_ctx->div_addr = FIELD_GET(GENMASK_ULL(63, 10), ctx->ret_addr); 1184 break; 1185 1186 case DF4p5_NPS0_24CHAN_2K_HASH: 1187 case DF4p5_NPS1_12CHAN_2K_HASH: 1188 case DF4p5_NPS2_6CHAN_2K_HASH: 1189 case DF4p5_NPS4_3CHAN_2K_HASH: 1190 case DF4p5_NPS1_10CHAN_2K_HASH: 1191 case DF4p5_NPS2_5CHAN_2K_HASH: 1192 denorm_ctx->base_denorm_addr |= FIELD_GET(GENMASK_ULL(10, 8), ctx->ret_addr) << 9; 1193 denorm_ctx->div_addr = FIELD_GET(GENMASK_ULL(63, 11), ctx->ret_addr); 1194 break; 1195 1196 default: 1197 atl_debug_on_bad_intlv_mode(ctx); 1198 return -EINVAL; 1199 } 1200 1201 if (ctx->map.num_intlv_chan % 3 == 0) 1202 denorm_ctx->mod_value = 3; 1203 else 1204 denorm_ctx->mod_value = 5; 1205 1206 denorm_ctx->coh_st_fabric_id = get_logical_coh_st_fabric_id(ctx) - get_dst_fabric_id(ctx); 1207 1208 atl_debug(ctx, "Initialized df4p5_denorm_ctx:"); 1209 atl_debug(ctx, " mod_value = %d", denorm_ctx->mod_value); 1210 atl_debug(ctx, " perm_shift = %d", denorm_ctx->perm_shift); 1211 atl_debug(ctx, " rehash_vector = 0x%x", denorm_ctx->rehash_vector); 1212 atl_debug(ctx, " base_denorm_addr = 0x%016llx", denorm_ctx->base_denorm_addr); 1213 atl_debug(ctx, " div_addr = 0x%016llx", denorm_ctx->div_addr); 1214 atl_debug(ctx, " coh_st_fabric_id = 0x%x", denorm_ctx->coh_st_fabric_id); 1215 1216 return 0; 1217 } 1218 1219 /* 1220 * For DF 4.5, parts of the physical address can be directly pulled from the 1221 * normalized address. The exact bits will differ between interleave modes, but 1222 * using NPS0_24CHAN_1K_HASH as an example, the normalized address consists of 1223 * bits [63:13] (divided by 3), bits [11:10], and bits [7:0] of the system 1224 * physical address. 1225 * 1226 * In this case, there is no way to reconstruct the missing bits (bits 8, 9, 1227 * and 12) from the normalized address. Additionally, when bits [63:13] are 1228 * divided by 3, the remainder is dropped. Determine the proper combination of 1229 * "lost" bits and dropped remainder by iterating through each possible 1230 * permutation of these bits and then normalizing the generated system physical 1231 * addresses. If the normalized address matches the address we are trying to 1232 * translate, then we have found the correct permutation of bits. 1233 */ 1234 static int denorm_addr_df4p5_np2(struct addr_ctx *ctx) 1235 { 1236 struct df4p5_denorm_ctx denorm_ctx; 1237 int ret = 0; 1238 1239 memset(&denorm_ctx, 0, sizeof(denorm_ctx)); 1240 1241 atl_debug(ctx, "Denormalizing DF 4.5 normalized address 0x%016llx", ctx->ret_addr); 1242 1243 ret = init_df4p5_denorm_ctx(ctx, &denorm_ctx); 1244 if (ret) 1245 return ret; 1246 1247 return check_permutations(ctx, &denorm_ctx); 1248 } 1249 1250 int denormalize_address(struct addr_ctx *ctx) 1251 { 1252 switch (ctx->map.intlv_mode) { 1253 case NONE: 1254 return 0; 1255 case DF4_NPS4_3CHAN_HASH: 1256 case DF4_NPS2_6CHAN_HASH: 1257 case DF4_NPS1_12CHAN_HASH: 1258 case DF4_NPS2_5CHAN_HASH: 1259 case DF4_NPS1_10CHAN_HASH: 1260 return denorm_addr_df4_np2(ctx); 1261 case DF4p5_NPS0_24CHAN_1K_HASH: 1262 case DF4p5_NPS4_3CHAN_1K_HASH: 1263 case DF4p5_NPS2_6CHAN_1K_HASH: 1264 case DF4p5_NPS1_12CHAN_1K_HASH: 1265 case DF4p5_NPS2_5CHAN_1K_HASH: 1266 case DF4p5_NPS1_10CHAN_1K_HASH: 1267 case DF4p5_NPS4_3CHAN_2K_HASH: 1268 case DF4p5_NPS2_6CHAN_2K_HASH: 1269 case DF4p5_NPS1_12CHAN_2K_HASH: 1270 case DF4p5_NPS0_24CHAN_2K_HASH: 1271 case DF4p5_NPS2_5CHAN_2K_HASH: 1272 case DF4p5_NPS1_10CHAN_2K_HASH: 1273 return denorm_addr_df4p5_np2(ctx); 1274 case DF3_6CHAN: 1275 return denorm_addr_df3_6chan(ctx); 1276 default: 1277 return denorm_addr_common(ctx); 1278 } 1279 } 1280