1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * AMD Address Translation Library
4 *
5 * denormalize.c : Functions to account for interleaving bits
6 *
7 * Copyright (c) 2023, Advanced Micro Devices, Inc.
8 * All Rights Reserved.
9 *
10 * Author: Yazen Ghannam <Yazen.Ghannam@amd.com>
11 */
12
13 #include "internal.h"
14
15 /*
16 * Returns the Destination Fabric ID. This is the first (lowest)
17 * COH_ST Fabric ID used within a DRAM Address map.
18 */
get_dst_fabric_id(struct addr_ctx * ctx)19 static u16 get_dst_fabric_id(struct addr_ctx *ctx)
20 {
21 switch (df_cfg.rev) {
22 case DF2: return FIELD_GET(DF2_DST_FABRIC_ID, ctx->map.limit);
23 case DF3: return FIELD_GET(DF3_DST_FABRIC_ID, ctx->map.limit);
24 case DF3p5: return FIELD_GET(DF3p5_DST_FABRIC_ID, ctx->map.limit);
25 case DF4: return FIELD_GET(DF4_DST_FABRIC_ID, ctx->map.ctl);
26 case DF4p5: return FIELD_GET(DF4p5_DST_FABRIC_ID, ctx->map.ctl);
27 default:
28 atl_debug_on_bad_df_rev();
29 return 0;
30 }
31 }
32
33 /*
34 * Make a contiguous gap in address for N bits starting at bit P.
35 *
36 * Example:
37 * address bits: [20:0]
38 * # of interleave bits (n): 3
39 * starting interleave bit (p): 8
40 *
41 * expanded address bits: [20+n : n+p][n+p-1 : p][p-1 : 0]
42 * [23 : 11][10 : 8][7 : 0]
43 */
make_space_for_coh_st_id_at_intlv_bit(struct addr_ctx * ctx)44 static u64 make_space_for_coh_st_id_at_intlv_bit(struct addr_ctx *ctx)
45 {
46 return expand_bits(ctx->map.intlv_bit_pos,
47 ctx->map.total_intlv_bits,
48 ctx->ret_addr);
49 }
50
51 /*
52 * Make two gaps in address for N bits.
53 * First gap is a single bit at bit P.
54 * Second gap is the remaining N-1 bits at bit 12.
55 *
56 * Example:
57 * address bits: [20:0]
58 * # of interleave bits (n): 3
59 * starting interleave bit (p): 8
60 *
61 * First gap
62 * expanded address bits: [20+1 : p+1][p][p-1 : 0]
63 * [21 : 9][8][7 : 0]
64 *
65 * Second gap uses result from first.
66 * r = n - 1; remaining interleave bits
67 * expanded address bits: [21+r : 12+r][12+r-1: 12][11 : 0]
68 * [23 : 14][13 : 12][11 : 0]
69 */
make_space_for_coh_st_id_split_2_1(struct addr_ctx * ctx)70 static u64 make_space_for_coh_st_id_split_2_1(struct addr_ctx *ctx)
71 {
72 /* Make a single space at the interleave bit. */
73 u64 denorm_addr = expand_bits(ctx->map.intlv_bit_pos, 1, ctx->ret_addr);
74
75 /* Done if there's only a single interleave bit. */
76 if (ctx->map.total_intlv_bits <= 1)
77 return denorm_addr;
78
79 /* Make spaces for the remaining interleave bits starting at bit 12. */
80 return expand_bits(12, ctx->map.total_intlv_bits - 1, denorm_addr);
81 }
82
83 /*
84 * Make space for CS ID at bits [14:8] as follows:
85 *
86 * 8 channels -> bits [10:8]
87 * 16 channels -> bits [11:8]
88 * 32 channels -> bits [14,11:8]
89 *
90 * 1 die -> N/A
91 * 2 dies -> bit [12]
92 * 4 dies -> bits [13:12]
93 */
make_space_for_coh_st_id_mi300(struct addr_ctx * ctx)94 static u64 make_space_for_coh_st_id_mi300(struct addr_ctx *ctx)
95 {
96 u8 num_intlv_bits = ilog2(ctx->map.num_intlv_chan);
97 u64 denorm_addr;
98
99 if (ctx->map.intlv_bit_pos != 8) {
100 pr_debug("Invalid interleave bit: %u", ctx->map.intlv_bit_pos);
101 return ~0ULL;
102 }
103
104 /* Channel bits. Covers up to 4 bits at [11:8]. */
105 denorm_addr = expand_bits(8, min(num_intlv_bits, 4), ctx->ret_addr);
106
107 /* Die bits. Always starts at [12]. */
108 denorm_addr = expand_bits(12, ilog2(ctx->map.num_intlv_dies), denorm_addr);
109
110 /* Additional channel bit at [14]. */
111 if (num_intlv_bits > 4)
112 denorm_addr = expand_bits(14, 1, denorm_addr);
113
114 return denorm_addr;
115 }
116
117 /*
118 * Take the current calculated address and shift enough bits in the middle
119 * to make a gap where the interleave bits will be inserted.
120 */
make_space_for_coh_st_id(struct addr_ctx * ctx)121 static u64 make_space_for_coh_st_id(struct addr_ctx *ctx)
122 {
123 switch (ctx->map.intlv_mode) {
124 case NOHASH_2CHAN:
125 case NOHASH_4CHAN:
126 case NOHASH_8CHAN:
127 case NOHASH_16CHAN:
128 case NOHASH_32CHAN:
129 case DF2_2CHAN_HASH:
130 return make_space_for_coh_st_id_at_intlv_bit(ctx);
131
132 case DF3_COD4_2CHAN_HASH:
133 case DF3_COD2_4CHAN_HASH:
134 case DF3_COD1_8CHAN_HASH:
135 case DF4_NPS4_2CHAN_HASH:
136 case DF4_NPS2_4CHAN_HASH:
137 case DF4_NPS1_8CHAN_HASH:
138 case DF4p5_NPS4_2CHAN_1K_HASH:
139 case DF4p5_NPS4_2CHAN_2K_HASH:
140 case DF4p5_NPS2_4CHAN_2K_HASH:
141 case DF4p5_NPS1_8CHAN_2K_HASH:
142 case DF4p5_NPS1_16CHAN_2K_HASH:
143 return make_space_for_coh_st_id_split_2_1(ctx);
144
145 case MI3_HASH_8CHAN:
146 case MI3_HASH_16CHAN:
147 case MI3_HASH_32CHAN:
148 return make_space_for_coh_st_id_mi300(ctx);
149
150 default:
151 atl_debug_on_bad_intlv_mode(ctx);
152 return ~0ULL;
153 }
154 }
155
get_coh_st_id_df2(struct addr_ctx * ctx)156 static u16 get_coh_st_id_df2(struct addr_ctx *ctx)
157 {
158 u8 num_socket_intlv_bits = ilog2(ctx->map.num_intlv_sockets);
159 u8 num_die_intlv_bits = ilog2(ctx->map.num_intlv_dies);
160 u8 num_intlv_bits;
161 u16 coh_st_id, mask;
162
163 coh_st_id = ctx->coh_st_fabric_id - get_dst_fabric_id(ctx);
164
165 /* Channel interleave bits */
166 num_intlv_bits = order_base_2(ctx->map.num_intlv_chan);
167 mask = GENMASK(num_intlv_bits - 1, 0);
168 coh_st_id &= mask;
169
170 /* Die interleave bits */
171 if (num_die_intlv_bits) {
172 u16 die_bits;
173
174 mask = GENMASK(num_die_intlv_bits - 1, 0);
175 die_bits = ctx->coh_st_fabric_id & df_cfg.die_id_mask;
176 die_bits >>= df_cfg.die_id_shift;
177
178 coh_st_id |= (die_bits & mask) << num_intlv_bits;
179 num_intlv_bits += num_die_intlv_bits;
180 }
181
182 /* Socket interleave bits */
183 if (num_socket_intlv_bits) {
184 u16 socket_bits;
185
186 mask = GENMASK(num_socket_intlv_bits - 1, 0);
187 socket_bits = ctx->coh_st_fabric_id & df_cfg.socket_id_mask;
188 socket_bits >>= df_cfg.socket_id_shift;
189
190 coh_st_id |= (socket_bits & mask) << num_intlv_bits;
191 }
192
193 return coh_st_id;
194 }
195
get_coh_st_id_df4(struct addr_ctx * ctx)196 static u16 get_coh_st_id_df4(struct addr_ctx *ctx)
197 {
198 /*
199 * Start with the original component mask and the number of interleave
200 * bits for the channels in this map.
201 */
202 u8 num_intlv_bits = ilog2(ctx->map.num_intlv_chan);
203 u16 mask = df_cfg.component_id_mask;
204
205 u16 socket_bits;
206
207 /* Set the derived Coherent Station ID to the input Coherent Station Fabric ID. */
208 u16 coh_st_id = ctx->coh_st_fabric_id & mask;
209
210 /*
211 * Subtract the "base" Destination Fabric ID.
212 * This accounts for systems with disabled Coherent Stations.
213 */
214 coh_st_id -= get_dst_fabric_id(ctx) & mask;
215
216 /*
217 * Generate and use a new mask based on the number of bits
218 * needed for channel interleaving in this map.
219 */
220 mask = GENMASK(num_intlv_bits - 1, 0);
221 coh_st_id &= mask;
222
223 /* Done if socket interleaving is not enabled. */
224 if (ctx->map.num_intlv_sockets <= 1)
225 return coh_st_id;
226
227 /*
228 * Figure out how many bits are needed for the number of
229 * interleaved sockets. And shift the derived Coherent Station ID to account
230 * for these.
231 */
232 num_intlv_bits = ilog2(ctx->map.num_intlv_sockets);
233 coh_st_id <<= num_intlv_bits;
234
235 /* Generate a new mask for the socket interleaving bits. */
236 mask = GENMASK(num_intlv_bits - 1, 0);
237
238 /* Get the socket interleave bits from the original Coherent Station Fabric ID. */
239 socket_bits = (ctx->coh_st_fabric_id & df_cfg.socket_id_mask) >> df_cfg.socket_id_shift;
240
241 /* Apply the appropriate socket bits to the derived Coherent Station ID. */
242 coh_st_id |= socket_bits & mask;
243
244 return coh_st_id;
245 }
246
247 /*
248 * MI300 hash has:
249 * (C)hannel[3:0] = coh_st_id[3:0]
250 * (S)tack[0] = coh_st_id[4]
251 * (D)ie[1:0] = coh_st_id[6:5]
252 *
253 * Hashed coh_st_id is swizzled so that Stack bit is at the end.
254 * coh_st_id = SDDCCCC
255 */
get_coh_st_id_mi300(struct addr_ctx * ctx)256 static u16 get_coh_st_id_mi300(struct addr_ctx *ctx)
257 {
258 u8 channel_bits, die_bits, stack_bit;
259 u16 die_id;
260
261 /* Subtract the "base" Destination Fabric ID. */
262 ctx->coh_st_fabric_id -= get_dst_fabric_id(ctx);
263
264 die_id = (ctx->coh_st_fabric_id & df_cfg.die_id_mask) >> df_cfg.die_id_shift;
265
266 channel_bits = FIELD_GET(GENMASK(3, 0), ctx->coh_st_fabric_id);
267 stack_bit = FIELD_GET(BIT(4), ctx->coh_st_fabric_id) << 6;
268 die_bits = die_id << 4;
269
270 return stack_bit | die_bits | channel_bits;
271 }
272
273 /*
274 * Derive the correct Coherent Station ID that represents the interleave bits
275 * used within the system physical address. This accounts for the
276 * interleave mode, number of interleaved channels/dies/sockets, and
277 * other system/mode-specific bit swizzling.
278 *
279 * Returns: Coherent Station ID on success.
280 * All bits set on error.
281 */
calculate_coh_st_id(struct addr_ctx * ctx)282 static u16 calculate_coh_st_id(struct addr_ctx *ctx)
283 {
284 switch (ctx->map.intlv_mode) {
285 case NOHASH_2CHAN:
286 case NOHASH_4CHAN:
287 case NOHASH_8CHAN:
288 case NOHASH_16CHAN:
289 case NOHASH_32CHAN:
290 case DF3_COD4_2CHAN_HASH:
291 case DF3_COD2_4CHAN_HASH:
292 case DF3_COD1_8CHAN_HASH:
293 case DF2_2CHAN_HASH:
294 return get_coh_st_id_df2(ctx);
295
296 case DF4_NPS4_2CHAN_HASH:
297 case DF4_NPS2_4CHAN_HASH:
298 case DF4_NPS1_8CHAN_HASH:
299 case DF4p5_NPS4_2CHAN_1K_HASH:
300 case DF4p5_NPS4_2CHAN_2K_HASH:
301 case DF4p5_NPS2_4CHAN_2K_HASH:
302 case DF4p5_NPS1_8CHAN_2K_HASH:
303 case DF4p5_NPS1_16CHAN_2K_HASH:
304 return get_coh_st_id_df4(ctx);
305
306 case MI3_HASH_8CHAN:
307 case MI3_HASH_16CHAN:
308 case MI3_HASH_32CHAN:
309 return get_coh_st_id_mi300(ctx);
310
311 /* COH_ST ID is simply the COH_ST Fabric ID adjusted by the Destination Fabric ID. */
312 case DF4p5_NPS2_4CHAN_1K_HASH:
313 case DF4p5_NPS1_8CHAN_1K_HASH:
314 case DF4p5_NPS1_16CHAN_1K_HASH:
315 return ctx->coh_st_fabric_id - get_dst_fabric_id(ctx);
316
317 default:
318 atl_debug_on_bad_intlv_mode(ctx);
319 return ~0;
320 }
321 }
322
insert_coh_st_id_at_intlv_bit(struct addr_ctx * ctx,u64 denorm_addr,u16 coh_st_id)323 static u64 insert_coh_st_id_at_intlv_bit(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id)
324 {
325 return denorm_addr | (coh_st_id << ctx->map.intlv_bit_pos);
326 }
327
insert_coh_st_id_split_2_1(struct addr_ctx * ctx,u64 denorm_addr,u16 coh_st_id)328 static u64 insert_coh_st_id_split_2_1(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id)
329 {
330 /* Insert coh_st_id[0] at the interleave bit. */
331 denorm_addr |= (coh_st_id & BIT(0)) << ctx->map.intlv_bit_pos;
332
333 /* Insert coh_st_id[2:1] at bit 12. */
334 denorm_addr |= (coh_st_id & GENMASK(2, 1)) << 11;
335
336 return denorm_addr;
337 }
338
insert_coh_st_id_split_2_2(struct addr_ctx * ctx,u64 denorm_addr,u16 coh_st_id)339 static u64 insert_coh_st_id_split_2_2(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id)
340 {
341 /* Insert coh_st_id[1:0] at bit 8. */
342 denorm_addr |= (coh_st_id & GENMASK(1, 0)) << 8;
343
344 /*
345 * Insert coh_st_id[n:2] at bit 12. 'n' could be 2 or 3.
346 * Grab both because bit 3 will be clear if unused.
347 */
348 denorm_addr |= (coh_st_id & GENMASK(3, 2)) << 10;
349
350 return denorm_addr;
351 }
352
insert_coh_st_id(struct addr_ctx * ctx,u64 denorm_addr,u16 coh_st_id)353 static u64 insert_coh_st_id(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id)
354 {
355 switch (ctx->map.intlv_mode) {
356 case NOHASH_2CHAN:
357 case NOHASH_4CHAN:
358 case NOHASH_8CHAN:
359 case NOHASH_16CHAN:
360 case NOHASH_32CHAN:
361 case MI3_HASH_8CHAN:
362 case MI3_HASH_16CHAN:
363 case MI3_HASH_32CHAN:
364 case DF2_2CHAN_HASH:
365 return insert_coh_st_id_at_intlv_bit(ctx, denorm_addr, coh_st_id);
366
367 case DF3_COD4_2CHAN_HASH:
368 case DF3_COD2_4CHAN_HASH:
369 case DF3_COD1_8CHAN_HASH:
370 case DF4_NPS4_2CHAN_HASH:
371 case DF4_NPS2_4CHAN_HASH:
372 case DF4_NPS1_8CHAN_HASH:
373 case DF4p5_NPS4_2CHAN_1K_HASH:
374 case DF4p5_NPS4_2CHAN_2K_HASH:
375 case DF4p5_NPS2_4CHAN_2K_HASH:
376 case DF4p5_NPS1_8CHAN_2K_HASH:
377 case DF4p5_NPS1_16CHAN_2K_HASH:
378 return insert_coh_st_id_split_2_1(ctx, denorm_addr, coh_st_id);
379
380 case DF4p5_NPS2_4CHAN_1K_HASH:
381 case DF4p5_NPS1_8CHAN_1K_HASH:
382 case DF4p5_NPS1_16CHAN_1K_HASH:
383 return insert_coh_st_id_split_2_2(ctx, denorm_addr, coh_st_id);
384
385 default:
386 atl_debug_on_bad_intlv_mode(ctx);
387 return ~0ULL;
388 }
389 }
390
391 /*
392 * MI300 systems have a fixed, hardware-defined physical-to-logical
393 * Coherent Station mapping. The Remap registers are not used.
394 */
395 static const u16 phy_to_log_coh_st_map_mi300[] = {
396 12, 13, 14, 15,
397 8, 9, 10, 11,
398 4, 5, 6, 7,
399 0, 1, 2, 3,
400 28, 29, 30, 31,
401 24, 25, 26, 27,
402 20, 21, 22, 23,
403 16, 17, 18, 19,
404 };
405
get_logical_coh_st_fabric_id_mi300(struct addr_ctx * ctx)406 static u16 get_logical_coh_st_fabric_id_mi300(struct addr_ctx *ctx)
407 {
408 if (ctx->inst_id >= ARRAY_SIZE(phy_to_log_coh_st_map_mi300)) {
409 atl_debug(ctx, "Instance ID out of range");
410 return ~0;
411 }
412
413 return phy_to_log_coh_st_map_mi300[ctx->inst_id] | (ctx->node_id << df_cfg.node_id_shift);
414 }
415
get_logical_coh_st_fabric_id(struct addr_ctx * ctx)416 static u16 get_logical_coh_st_fabric_id(struct addr_ctx *ctx)
417 {
418 u16 component_id, log_fabric_id;
419
420 /* Start with the physical COH_ST Fabric ID. */
421 u16 phys_fabric_id = ctx->coh_st_fabric_id;
422
423 if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous)
424 return get_logical_coh_st_fabric_id_mi300(ctx);
425
426 /* Skip logical ID lookup if remapping is disabled. */
427 if (!FIELD_GET(DF4_REMAP_EN, ctx->map.ctl) &&
428 ctx->map.intlv_mode != DF3_6CHAN)
429 return phys_fabric_id;
430
431 /* Mask off the Node ID bits to get the "local" Component ID. */
432 component_id = phys_fabric_id & df_cfg.component_id_mask;
433
434 /*
435 * Search the list of logical Component IDs for the one that
436 * matches this physical Component ID.
437 */
438 for (log_fabric_id = 0; log_fabric_id < MAX_COH_ST_CHANNELS; log_fabric_id++) {
439 if (ctx->map.remap_array[log_fabric_id] == component_id)
440 break;
441 }
442
443 if (log_fabric_id == MAX_COH_ST_CHANNELS)
444 atl_debug(ctx, "COH_ST remap entry not found for 0x%x",
445 log_fabric_id);
446
447 /* Get the Node ID bits from the physical and apply to the logical. */
448 return (phys_fabric_id & df_cfg.node_id_mask) | log_fabric_id;
449 }
450
get_logical_coh_st_fabric_id_for_current_spa(struct addr_ctx * ctx,struct df4p5_denorm_ctx * denorm_ctx)451 static u16 get_logical_coh_st_fabric_id_for_current_spa(struct addr_ctx *ctx,
452 struct df4p5_denorm_ctx *denorm_ctx)
453 {
454 bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G, hash_ctl_1T;
455 bool hash_pa8, hash_pa9, hash_pa12, hash_pa13;
456 u64 cs_id = 0;
457
458 hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl);
459 hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl);
460 hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl);
461 hash_ctl_1T = FIELD_GET(DF4p5_HASH_CTL_1T, ctx->map.ctl);
462
463 hash_pa8 = FIELD_GET(BIT_ULL(8), denorm_ctx->current_spa);
464 hash_pa8 ^= FIELD_GET(BIT_ULL(14), denorm_ctx->current_spa);
465 hash_pa8 ^= FIELD_GET(BIT_ULL(16), denorm_ctx->current_spa) & hash_ctl_64k;
466 hash_pa8 ^= FIELD_GET(BIT_ULL(21), denorm_ctx->current_spa) & hash_ctl_2M;
467 hash_pa8 ^= FIELD_GET(BIT_ULL(30), denorm_ctx->current_spa) & hash_ctl_1G;
468 hash_pa8 ^= FIELD_GET(BIT_ULL(40), denorm_ctx->current_spa) & hash_ctl_1T;
469
470 hash_pa9 = FIELD_GET(BIT_ULL(9), denorm_ctx->current_spa);
471 hash_pa9 ^= FIELD_GET(BIT_ULL(17), denorm_ctx->current_spa) & hash_ctl_64k;
472 hash_pa9 ^= FIELD_GET(BIT_ULL(22), denorm_ctx->current_spa) & hash_ctl_2M;
473 hash_pa9 ^= FIELD_GET(BIT_ULL(31), denorm_ctx->current_spa) & hash_ctl_1G;
474 hash_pa9 ^= FIELD_GET(BIT_ULL(41), denorm_ctx->current_spa) & hash_ctl_1T;
475
476 hash_pa12 = FIELD_GET(BIT_ULL(12), denorm_ctx->current_spa);
477 hash_pa12 ^= FIELD_GET(BIT_ULL(18), denorm_ctx->current_spa) & hash_ctl_64k;
478 hash_pa12 ^= FIELD_GET(BIT_ULL(23), denorm_ctx->current_spa) & hash_ctl_2M;
479 hash_pa12 ^= FIELD_GET(BIT_ULL(32), denorm_ctx->current_spa) & hash_ctl_1G;
480 hash_pa12 ^= FIELD_GET(BIT_ULL(42), denorm_ctx->current_spa) & hash_ctl_1T;
481
482 hash_pa13 = FIELD_GET(BIT_ULL(13), denorm_ctx->current_spa);
483 hash_pa13 ^= FIELD_GET(BIT_ULL(19), denorm_ctx->current_spa) & hash_ctl_64k;
484 hash_pa13 ^= FIELD_GET(BIT_ULL(24), denorm_ctx->current_spa) & hash_ctl_2M;
485 hash_pa13 ^= FIELD_GET(BIT_ULL(33), denorm_ctx->current_spa) & hash_ctl_1G;
486 hash_pa13 ^= FIELD_GET(BIT_ULL(43), denorm_ctx->current_spa) & hash_ctl_1T;
487
488 switch (ctx->map.intlv_mode) {
489 case DF4p5_NPS0_24CHAN_1K_HASH:
490 cs_id = FIELD_GET(GENMASK_ULL(63, 13), denorm_ctx->current_spa) << 3;
491 cs_id %= denorm_ctx->mod_value;
492 cs_id <<= 2;
493 cs_id |= (hash_pa9 | (hash_pa12 << 1));
494 cs_id |= hash_pa8 << df_cfg.socket_id_shift;
495 break;
496
497 case DF4p5_NPS0_24CHAN_2K_HASH:
498 cs_id = FIELD_GET(GENMASK_ULL(63, 14), denorm_ctx->current_spa) << 4;
499 cs_id %= denorm_ctx->mod_value;
500 cs_id <<= 2;
501 cs_id |= (hash_pa12 | (hash_pa13 << 1));
502 cs_id |= hash_pa8 << df_cfg.socket_id_shift;
503 break;
504
505 case DF4p5_NPS1_12CHAN_1K_HASH:
506 cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2;
507 cs_id %= denorm_ctx->mod_value;
508 cs_id <<= 2;
509 cs_id |= (hash_pa8 | (hash_pa9 << 1));
510 break;
511
512 case DF4p5_NPS1_12CHAN_2K_HASH:
513 cs_id = FIELD_GET(GENMASK_ULL(63, 13), denorm_ctx->current_spa) << 3;
514 cs_id %= denorm_ctx->mod_value;
515 cs_id <<= 2;
516 cs_id |= (hash_pa8 | (hash_pa12 << 1));
517 break;
518
519 case DF4p5_NPS2_6CHAN_1K_HASH:
520 case DF4p5_NPS1_10CHAN_1K_HASH:
521 cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2;
522 cs_id |= (FIELD_GET(BIT_ULL(9), denorm_ctx->current_spa) << 1);
523 cs_id %= denorm_ctx->mod_value;
524 cs_id <<= 1;
525 cs_id |= hash_pa8;
526 break;
527
528 case DF4p5_NPS2_6CHAN_2K_HASH:
529 case DF4p5_NPS1_10CHAN_2K_HASH:
530 cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2;
531 cs_id %= denorm_ctx->mod_value;
532 cs_id <<= 1;
533 cs_id |= hash_pa8;
534 break;
535
536 case DF4p5_NPS4_3CHAN_1K_HASH:
537 case DF4p5_NPS2_5CHAN_1K_HASH:
538 cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2;
539 cs_id |= FIELD_GET(GENMASK_ULL(9, 8), denorm_ctx->current_spa);
540 cs_id %= denorm_ctx->mod_value;
541 break;
542
543 case DF4p5_NPS4_3CHAN_2K_HASH:
544 case DF4p5_NPS2_5CHAN_2K_HASH:
545 cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2;
546 cs_id |= FIELD_GET(BIT_ULL(8), denorm_ctx->current_spa) << 1;
547 cs_id %= denorm_ctx->mod_value;
548 break;
549
550 default:
551 atl_debug_on_bad_intlv_mode(ctx);
552 return 0;
553 }
554
555 if (cs_id > 0xffff) {
556 atl_debug(ctx, "Translation error: Resulting cs_id larger than u16\n");
557 return 0;
558 }
559
560 return cs_id;
561 }
562
denorm_addr_common(struct addr_ctx * ctx)563 static int denorm_addr_common(struct addr_ctx *ctx)
564 {
565 u64 denorm_addr;
566 u16 coh_st_id;
567
568 /*
569 * Convert the original physical COH_ST Fabric ID to a logical value.
570 * This is required for non-power-of-two and other interleaving modes.
571 */
572 ctx->coh_st_fabric_id = get_logical_coh_st_fabric_id(ctx);
573
574 denorm_addr = make_space_for_coh_st_id(ctx);
575 coh_st_id = calculate_coh_st_id(ctx);
576 ctx->ret_addr = insert_coh_st_id(ctx, denorm_addr, coh_st_id);
577 return 0;
578 }
579
denorm_addr_df3_6chan(struct addr_ctx * ctx)580 static int denorm_addr_df3_6chan(struct addr_ctx *ctx)
581 {
582 u16 coh_st_id = ctx->coh_st_fabric_id & df_cfg.component_id_mask;
583 u8 total_intlv_bits = ctx->map.total_intlv_bits;
584 u8 low_bit, intlv_bit = ctx->map.intlv_bit_pos;
585 u64 msb_intlv_bits, temp_addr_a, temp_addr_b;
586 u8 np2_bits = ctx->map.np2_bits;
587
588 if (ctx->map.intlv_mode != DF3_6CHAN)
589 return -EINVAL;
590
591 /*
592 * 'np2_bits' holds the number of bits needed to cover the
593 * amount of memory (rounded up) in this map using 64K chunks.
594 *
595 * Example:
596 * Total memory in map: 6GB
597 * Rounded up to next power-of-2: 8GB
598 * Number of 64K chunks: 0x20000
599 * np2_bits = log2(# of chunks): 17
600 *
601 * Get the two most-significant interleave bits from the
602 * input address based on the following:
603 *
604 * [15 + np2_bits - total_intlv_bits : 14 + np2_bits - total_intlv_bits]
605 */
606 low_bit = 14 + np2_bits - total_intlv_bits;
607 msb_intlv_bits = ctx->ret_addr >> low_bit;
608 msb_intlv_bits &= 0x3;
609
610 /*
611 * If MSB are 11b, then logical COH_ST ID is 6 or 7.
612 * Need to adjust based on the mod3 result.
613 */
614 if (msb_intlv_bits == 3) {
615 u8 addr_mod, phys_addr_msb, msb_coh_st_id;
616
617 /* Get the remaining interleave bits from the input address. */
618 temp_addr_b = GENMASK_ULL(low_bit - 1, intlv_bit) & ctx->ret_addr;
619 temp_addr_b >>= intlv_bit;
620
621 /* Calculate the logical COH_ST offset based on mod3. */
622 addr_mod = temp_addr_b % 3;
623
624 /* Get COH_ST ID bits [2:1]. */
625 msb_coh_st_id = (coh_st_id >> 1) & 0x3;
626
627 /* Get the bit that starts the physical address bits. */
628 phys_addr_msb = (intlv_bit + np2_bits + 1);
629 phys_addr_msb &= BIT(0);
630 phys_addr_msb++;
631 phys_addr_msb *= 3 - addr_mod + msb_coh_st_id;
632 phys_addr_msb %= 3;
633
634 /* Move the physical address MSB to the correct place. */
635 temp_addr_b |= phys_addr_msb << (low_bit - total_intlv_bits - intlv_bit);
636
637 /* Generate a new COH_ST ID as follows: coh_st_id = [1, 1, coh_st_id[0]] */
638 coh_st_id &= BIT(0);
639 coh_st_id |= GENMASK(2, 1);
640 } else {
641 temp_addr_b = GENMASK_ULL(63, intlv_bit) & ctx->ret_addr;
642 temp_addr_b >>= intlv_bit;
643 }
644
645 temp_addr_a = GENMASK_ULL(intlv_bit - 1, 0) & ctx->ret_addr;
646 temp_addr_b <<= intlv_bit + total_intlv_bits;
647
648 ctx->ret_addr = temp_addr_a | temp_addr_b;
649 ctx->ret_addr |= coh_st_id << intlv_bit;
650 return 0;
651 }
652
denorm_addr_df4_np2(struct addr_ctx * ctx)653 static int denorm_addr_df4_np2(struct addr_ctx *ctx)
654 {
655 bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G;
656 u16 group, group_offset, log_coh_st_offset;
657 unsigned int mod_value, shift_value;
658 u16 mask = df_cfg.component_id_mask;
659 u64 temp_addr_a, temp_addr_b;
660 bool hash_pa8, hashed_bit;
661
662 switch (ctx->map.intlv_mode) {
663 case DF4_NPS4_3CHAN_HASH:
664 mod_value = 3;
665 shift_value = 13;
666 break;
667 case DF4_NPS2_6CHAN_HASH:
668 mod_value = 3;
669 shift_value = 12;
670 break;
671 case DF4_NPS1_12CHAN_HASH:
672 mod_value = 3;
673 shift_value = 11;
674 break;
675 case DF4_NPS2_5CHAN_HASH:
676 mod_value = 5;
677 shift_value = 13;
678 break;
679 case DF4_NPS1_10CHAN_HASH:
680 mod_value = 5;
681 shift_value = 12;
682 break;
683 default:
684 atl_debug_on_bad_intlv_mode(ctx);
685 return -EINVAL;
686 };
687
688 if (ctx->map.num_intlv_sockets == 1) {
689 hash_pa8 = BIT_ULL(shift_value) & ctx->ret_addr;
690 temp_addr_a = remove_bits(shift_value, shift_value, ctx->ret_addr);
691 } else {
692 hash_pa8 = ctx->coh_st_fabric_id & df_cfg.socket_id_mask;
693 temp_addr_a = ctx->ret_addr;
694 }
695
696 /* Make a gap for the real bit [8]. */
697 temp_addr_a = expand_bits(8, 1, temp_addr_a);
698
699 /* Make an additional gap for bits [13:12], as appropriate.*/
700 if (ctx->map.intlv_mode == DF4_NPS2_6CHAN_HASH ||
701 ctx->map.intlv_mode == DF4_NPS1_10CHAN_HASH) {
702 temp_addr_a = expand_bits(13, 1, temp_addr_a);
703 } else if (ctx->map.intlv_mode == DF4_NPS1_12CHAN_HASH) {
704 temp_addr_a = expand_bits(12, 2, temp_addr_a);
705 }
706
707 /* Keep bits [13:0]. */
708 temp_addr_a &= GENMASK_ULL(13, 0);
709
710 /* Get the appropriate high bits. */
711 shift_value += 1 - ilog2(ctx->map.num_intlv_sockets);
712 temp_addr_b = GENMASK_ULL(63, shift_value) & ctx->ret_addr;
713 temp_addr_b >>= shift_value;
714 temp_addr_b *= mod_value;
715
716 /*
717 * Coherent Stations are divided into groups.
718 *
719 * Multiples of 3 (mod3) are divided into quadrants.
720 * e.g. NP4_3CHAN -> [0, 1, 2] [6, 7, 8]
721 * [3, 4, 5] [9, 10, 11]
722 *
723 * Multiples of 5 (mod5) are divided into sides.
724 * e.g. NP2_5CHAN -> [0, 1, 2, 3, 4] [5, 6, 7, 8, 9]
725 */
726
727 /*
728 * Calculate the logical offset for the COH_ST within its DRAM Address map.
729 * e.g. if map includes [5, 6, 7, 8, 9] and target instance is '8', then
730 * log_coh_st_offset = 8 - 5 = 3
731 */
732 log_coh_st_offset = (ctx->coh_st_fabric_id & mask) - (get_dst_fabric_id(ctx) & mask);
733
734 /*
735 * Figure out the group number.
736 *
737 * Following above example,
738 * log_coh_st_offset = 3
739 * mod_value = 5
740 * group = 3 / 5 = 0
741 */
742 group = log_coh_st_offset / mod_value;
743
744 /*
745 * Figure out the offset within the group.
746 *
747 * Following above example,
748 * log_coh_st_offset = 3
749 * mod_value = 5
750 * group_offset = 3 % 5 = 3
751 */
752 group_offset = log_coh_st_offset % mod_value;
753
754 /* Adjust group_offset if the hashed bit [8] is set. */
755 if (hash_pa8) {
756 if (!group_offset)
757 group_offset = mod_value - 1;
758 else
759 group_offset--;
760 }
761
762 /* Add in the group offset to the high bits. */
763 temp_addr_b += group_offset;
764
765 /* Shift the high bits to the proper starting position. */
766 temp_addr_b <<= 14;
767
768 /* Combine the high and low bits together. */
769 ctx->ret_addr = temp_addr_a | temp_addr_b;
770
771 /* Account for hashing here instead of in dehash_address(). */
772 hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl);
773 hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl);
774 hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl);
775
776 hashed_bit = !!hash_pa8;
777 hashed_bit ^= FIELD_GET(BIT_ULL(14), ctx->ret_addr);
778 hashed_bit ^= FIELD_GET(BIT_ULL(16), ctx->ret_addr) & hash_ctl_64k;
779 hashed_bit ^= FIELD_GET(BIT_ULL(21), ctx->ret_addr) & hash_ctl_2M;
780 hashed_bit ^= FIELD_GET(BIT_ULL(30), ctx->ret_addr) & hash_ctl_1G;
781
782 ctx->ret_addr |= hashed_bit << 8;
783
784 /* Done for 3 and 5 channel. */
785 if (ctx->map.intlv_mode == DF4_NPS4_3CHAN_HASH ||
786 ctx->map.intlv_mode == DF4_NPS2_5CHAN_HASH)
787 return 0;
788
789 /* Select the proper 'group' bit to use for Bit 13. */
790 if (ctx->map.intlv_mode == DF4_NPS1_12CHAN_HASH)
791 hashed_bit = !!(group & BIT(1));
792 else
793 hashed_bit = group & BIT(0);
794
795 hashed_bit ^= FIELD_GET(BIT_ULL(18), ctx->ret_addr) & hash_ctl_64k;
796 hashed_bit ^= FIELD_GET(BIT_ULL(23), ctx->ret_addr) & hash_ctl_2M;
797 hashed_bit ^= FIELD_GET(BIT_ULL(32), ctx->ret_addr) & hash_ctl_1G;
798
799 ctx->ret_addr |= hashed_bit << 13;
800
801 /* Done for 6 and 10 channel. */
802 if (ctx->map.intlv_mode != DF4_NPS1_12CHAN_HASH)
803 return 0;
804
805 hashed_bit = group & BIT(0);
806 hashed_bit ^= FIELD_GET(BIT_ULL(17), ctx->ret_addr) & hash_ctl_64k;
807 hashed_bit ^= FIELD_GET(BIT_ULL(22), ctx->ret_addr) & hash_ctl_2M;
808 hashed_bit ^= FIELD_GET(BIT_ULL(31), ctx->ret_addr) & hash_ctl_1G;
809
810 ctx->ret_addr |= hashed_bit << 12;
811 return 0;
812 }
813
normalize_addr_df4p5_np2(struct addr_ctx * ctx,struct df4p5_denorm_ctx * denorm_ctx,u64 addr)814 static u64 normalize_addr_df4p5_np2(struct addr_ctx *ctx, struct df4p5_denorm_ctx *denorm_ctx,
815 u64 addr)
816 {
817 u64 temp_addr_a = 0, temp_addr_b = 0;
818
819 switch (ctx->map.intlv_mode) {
820 case DF4p5_NPS0_24CHAN_1K_HASH:
821 case DF4p5_NPS1_12CHAN_1K_HASH:
822 case DF4p5_NPS2_6CHAN_1K_HASH:
823 case DF4p5_NPS4_3CHAN_1K_HASH:
824 case DF4p5_NPS1_10CHAN_1K_HASH:
825 case DF4p5_NPS2_5CHAN_1K_HASH:
826 temp_addr_a = FIELD_GET(GENMASK_ULL(11, 10), addr) << 8;
827 break;
828
829 case DF4p5_NPS0_24CHAN_2K_HASH:
830 case DF4p5_NPS1_12CHAN_2K_HASH:
831 case DF4p5_NPS2_6CHAN_2K_HASH:
832 case DF4p5_NPS4_3CHAN_2K_HASH:
833 case DF4p5_NPS1_10CHAN_2K_HASH:
834 case DF4p5_NPS2_5CHAN_2K_HASH:
835 temp_addr_a = FIELD_GET(GENMASK_ULL(11, 9), addr) << 8;
836 break;
837
838 default:
839 atl_debug_on_bad_intlv_mode(ctx);
840 return 0;
841 }
842
843 switch (ctx->map.intlv_mode) {
844 case DF4p5_NPS0_24CHAN_1K_HASH:
845 temp_addr_b = FIELD_GET(GENMASK_ULL(63, 13), addr) / denorm_ctx->mod_value;
846 temp_addr_b <<= 10;
847 break;
848
849 case DF4p5_NPS0_24CHAN_2K_HASH:
850 temp_addr_b = FIELD_GET(GENMASK_ULL(63, 14), addr) / denorm_ctx->mod_value;
851 temp_addr_b <<= 11;
852 break;
853
854 case DF4p5_NPS1_12CHAN_1K_HASH:
855 temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) / denorm_ctx->mod_value;
856 temp_addr_b <<= 10;
857 break;
858
859 case DF4p5_NPS1_12CHAN_2K_HASH:
860 temp_addr_b = FIELD_GET(GENMASK_ULL(63, 13), addr) / denorm_ctx->mod_value;
861 temp_addr_b <<= 11;
862 break;
863
864 case DF4p5_NPS2_6CHAN_1K_HASH:
865 case DF4p5_NPS1_10CHAN_1K_HASH:
866 temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) << 1;
867 temp_addr_b |= FIELD_GET(BIT_ULL(9), addr);
868 temp_addr_b /= denorm_ctx->mod_value;
869 temp_addr_b <<= 10;
870 break;
871
872 case DF4p5_NPS2_6CHAN_2K_HASH:
873 case DF4p5_NPS1_10CHAN_2K_HASH:
874 temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) / denorm_ctx->mod_value;
875 temp_addr_b <<= 11;
876 break;
877
878 case DF4p5_NPS4_3CHAN_1K_HASH:
879 case DF4p5_NPS2_5CHAN_1K_HASH:
880 temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) << 2;
881 temp_addr_b |= FIELD_GET(GENMASK_ULL(9, 8), addr);
882 temp_addr_b /= denorm_ctx->mod_value;
883 temp_addr_b <<= 10;
884 break;
885
886 case DF4p5_NPS4_3CHAN_2K_HASH:
887 case DF4p5_NPS2_5CHAN_2K_HASH:
888 temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) << 1;
889 temp_addr_b |= FIELD_GET(BIT_ULL(8), addr);
890 temp_addr_b /= denorm_ctx->mod_value;
891 temp_addr_b <<= 11;
892 break;
893
894 default:
895 atl_debug_on_bad_intlv_mode(ctx);
896 return 0;
897 }
898
899 return denorm_ctx->base_denorm_addr | temp_addr_a | temp_addr_b;
900 }
901
recalculate_hashed_bits_df4p5_np2(struct addr_ctx * ctx,struct df4p5_denorm_ctx * denorm_ctx)902 static void recalculate_hashed_bits_df4p5_np2(struct addr_ctx *ctx,
903 struct df4p5_denorm_ctx *denorm_ctx)
904 {
905 bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G, hash_ctl_1T, hashed_bit;
906
907 if (!denorm_ctx->rehash_vector)
908 return;
909
910 hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl);
911 hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl);
912 hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl);
913 hash_ctl_1T = FIELD_GET(DF4p5_HASH_CTL_1T, ctx->map.ctl);
914
915 if (denorm_ctx->rehash_vector & BIT_ULL(8)) {
916 hashed_bit = FIELD_GET(BIT_ULL(8), denorm_ctx->current_spa);
917 hashed_bit ^= FIELD_GET(BIT_ULL(14), denorm_ctx->current_spa);
918 hashed_bit ^= FIELD_GET(BIT_ULL(16), denorm_ctx->current_spa) & hash_ctl_64k;
919 hashed_bit ^= FIELD_GET(BIT_ULL(21), denorm_ctx->current_spa) & hash_ctl_2M;
920 hashed_bit ^= FIELD_GET(BIT_ULL(30), denorm_ctx->current_spa) & hash_ctl_1G;
921 hashed_bit ^= FIELD_GET(BIT_ULL(40), denorm_ctx->current_spa) & hash_ctl_1T;
922
923 if (FIELD_GET(BIT_ULL(8), denorm_ctx->current_spa) != hashed_bit)
924 denorm_ctx->current_spa ^= BIT_ULL(8);
925 }
926
927 if (denorm_ctx->rehash_vector & BIT_ULL(9)) {
928 hashed_bit = FIELD_GET(BIT_ULL(9), denorm_ctx->current_spa);
929 hashed_bit ^= FIELD_GET(BIT_ULL(17), denorm_ctx->current_spa) & hash_ctl_64k;
930 hashed_bit ^= FIELD_GET(BIT_ULL(22), denorm_ctx->current_spa) & hash_ctl_2M;
931 hashed_bit ^= FIELD_GET(BIT_ULL(31), denorm_ctx->current_spa) & hash_ctl_1G;
932 hashed_bit ^= FIELD_GET(BIT_ULL(41), denorm_ctx->current_spa) & hash_ctl_1T;
933
934 if (FIELD_GET(BIT_ULL(9), denorm_ctx->current_spa) != hashed_bit)
935 denorm_ctx->current_spa ^= BIT_ULL(9);
936 }
937
938 if (denorm_ctx->rehash_vector & BIT_ULL(12)) {
939 hashed_bit = FIELD_GET(BIT_ULL(12), denorm_ctx->current_spa);
940 hashed_bit ^= FIELD_GET(BIT_ULL(18), denorm_ctx->current_spa) & hash_ctl_64k;
941 hashed_bit ^= FIELD_GET(BIT_ULL(23), denorm_ctx->current_spa) & hash_ctl_2M;
942 hashed_bit ^= FIELD_GET(BIT_ULL(32), denorm_ctx->current_spa) & hash_ctl_1G;
943 hashed_bit ^= FIELD_GET(BIT_ULL(42), denorm_ctx->current_spa) & hash_ctl_1T;
944
945 if (FIELD_GET(BIT_ULL(12), denorm_ctx->current_spa) != hashed_bit)
946 denorm_ctx->current_spa ^= BIT_ULL(12);
947 }
948
949 if (denorm_ctx->rehash_vector & BIT_ULL(13)) {
950 hashed_bit = FIELD_GET(BIT_ULL(13), denorm_ctx->current_spa);
951 hashed_bit ^= FIELD_GET(BIT_ULL(19), denorm_ctx->current_spa) & hash_ctl_64k;
952 hashed_bit ^= FIELD_GET(BIT_ULL(24), denorm_ctx->current_spa) & hash_ctl_2M;
953 hashed_bit ^= FIELD_GET(BIT_ULL(33), denorm_ctx->current_spa) & hash_ctl_1G;
954 hashed_bit ^= FIELD_GET(BIT_ULL(43), denorm_ctx->current_spa) & hash_ctl_1T;
955
956 if (FIELD_GET(BIT_ULL(13), denorm_ctx->current_spa) != hashed_bit)
957 denorm_ctx->current_spa ^= BIT_ULL(13);
958 }
959 }
960
match_logical_coh_st_fabric_id(struct addr_ctx * ctx,struct df4p5_denorm_ctx * denorm_ctx)961 static bool match_logical_coh_st_fabric_id(struct addr_ctx *ctx,
962 struct df4p5_denorm_ctx *denorm_ctx)
963 {
964 /*
965 * The logical CS fabric ID of the permutation must be calculated from the
966 * current SPA with the base and with the MMIO hole.
967 */
968 u16 id = get_logical_coh_st_fabric_id_for_current_spa(ctx, denorm_ctx);
969
970 atl_debug(ctx, "Checking calculated logical coherent station fabric id:\n");
971 atl_debug(ctx, " calculated fabric id = 0x%x\n", id);
972 atl_debug(ctx, " expected fabric id = 0x%x\n", denorm_ctx->coh_st_fabric_id);
973
974 return denorm_ctx->coh_st_fabric_id == id;
975 }
976
match_norm_addr(struct addr_ctx * ctx,struct df4p5_denorm_ctx * denorm_ctx)977 static bool match_norm_addr(struct addr_ctx *ctx, struct df4p5_denorm_ctx *denorm_ctx)
978 {
979 u64 addr = remove_base_and_hole(ctx, denorm_ctx->current_spa);
980
981 /*
982 * The normalized address must be calculated with the current SPA without
983 * the base and without the MMIO hole.
984 */
985 addr = normalize_addr_df4p5_np2(ctx, denorm_ctx, addr);
986
987 atl_debug(ctx, "Checking calculated normalized address:\n");
988 atl_debug(ctx, " calculated normalized addr = 0x%016llx\n", addr);
989 atl_debug(ctx, " expected normalized addr = 0x%016llx\n", ctx->ret_addr);
990
991 return addr == ctx->ret_addr;
992 }
993
check_permutations(struct addr_ctx * ctx,struct df4p5_denorm_ctx * denorm_ctx)994 static int check_permutations(struct addr_ctx *ctx, struct df4p5_denorm_ctx *denorm_ctx)
995 {
996 u64 test_perm, temp_addr, denorm_addr, num_perms;
997 unsigned int dropped_remainder;
998
999 denorm_ctx->div_addr *= denorm_ctx->mod_value;
1000
1001 /*
1002 * The high order bits of num_permutations represent the permutations
1003 * of the dropped remainder. This will be either 0-3 or 0-5 depending
1004 * on the interleave mode. The low order bits represent the
1005 * permutations of other "lost" bits which will be any combination of
1006 * 1, 2, or 3 bits depending on the interleave mode.
1007 */
1008 num_perms = denorm_ctx->mod_value << denorm_ctx->perm_shift;
1009
1010 for (test_perm = 0; test_perm < num_perms; test_perm++) {
1011 denorm_addr = denorm_ctx->base_denorm_addr;
1012 dropped_remainder = test_perm >> denorm_ctx->perm_shift;
1013 temp_addr = denorm_ctx->div_addr + dropped_remainder;
1014
1015 switch (ctx->map.intlv_mode) {
1016 case DF4p5_NPS0_24CHAN_2K_HASH:
1017 denorm_addr |= temp_addr << 14;
1018 break;
1019
1020 case DF4p5_NPS0_24CHAN_1K_HASH:
1021 case DF4p5_NPS1_12CHAN_2K_HASH:
1022 denorm_addr |= temp_addr << 13;
1023 break;
1024
1025 case DF4p5_NPS1_12CHAN_1K_HASH:
1026 case DF4p5_NPS2_6CHAN_2K_HASH:
1027 case DF4p5_NPS1_10CHAN_2K_HASH:
1028 denorm_addr |= temp_addr << 12;
1029 break;
1030
1031 case DF4p5_NPS2_6CHAN_1K_HASH:
1032 case DF4p5_NPS1_10CHAN_1K_HASH:
1033 denorm_addr |= FIELD_GET(BIT_ULL(0), temp_addr) << 9;
1034 denorm_addr |= FIELD_GET(GENMASK_ULL(63, 1), temp_addr) << 12;
1035 break;
1036
1037 case DF4p5_NPS4_3CHAN_1K_HASH:
1038 case DF4p5_NPS2_5CHAN_1K_HASH:
1039 denorm_addr |= FIELD_GET(GENMASK_ULL(1, 0), temp_addr) << 8;
1040 denorm_addr |= FIELD_GET(GENMASK_ULL(63, 2), (temp_addr)) << 12;
1041 break;
1042
1043 case DF4p5_NPS4_3CHAN_2K_HASH:
1044 case DF4p5_NPS2_5CHAN_2K_HASH:
1045 denorm_addr |= FIELD_GET(BIT_ULL(0), temp_addr) << 8;
1046 denorm_addr |= FIELD_GET(GENMASK_ULL(63, 1), temp_addr) << 12;
1047 break;
1048
1049 default:
1050 atl_debug_on_bad_intlv_mode(ctx);
1051 return -EINVAL;
1052 }
1053
1054 switch (ctx->map.intlv_mode) {
1055 case DF4p5_NPS0_24CHAN_1K_HASH:
1056 denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8;
1057 denorm_addr |= FIELD_GET(BIT_ULL(1), test_perm) << 9;
1058 denorm_addr |= FIELD_GET(BIT_ULL(2), test_perm) << 12;
1059 break;
1060
1061 case DF4p5_NPS0_24CHAN_2K_HASH:
1062 denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8;
1063 denorm_addr |= FIELD_GET(BIT_ULL(1), test_perm) << 12;
1064 denorm_addr |= FIELD_GET(BIT_ULL(2), test_perm) << 13;
1065 break;
1066
1067 case DF4p5_NPS1_12CHAN_2K_HASH:
1068 denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8;
1069 denorm_addr |= FIELD_GET(BIT_ULL(1), test_perm) << 12;
1070 break;
1071
1072 case DF4p5_NPS1_12CHAN_1K_HASH:
1073 case DF4p5_NPS4_3CHAN_1K_HASH:
1074 case DF4p5_NPS2_5CHAN_1K_HASH:
1075 denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8;
1076 denorm_addr |= FIELD_GET(BIT_ULL(1), test_perm) << 9;
1077 break;
1078
1079 case DF4p5_NPS2_6CHAN_1K_HASH:
1080 case DF4p5_NPS2_6CHAN_2K_HASH:
1081 case DF4p5_NPS4_3CHAN_2K_HASH:
1082 case DF4p5_NPS1_10CHAN_1K_HASH:
1083 case DF4p5_NPS1_10CHAN_2K_HASH:
1084 case DF4p5_NPS2_5CHAN_2K_HASH:
1085 denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8;
1086 break;
1087
1088 default:
1089 atl_debug_on_bad_intlv_mode(ctx);
1090 return -EINVAL;
1091 }
1092
1093 denorm_ctx->current_spa = add_base_and_hole(ctx, denorm_addr);
1094 recalculate_hashed_bits_df4p5_np2(ctx, denorm_ctx);
1095
1096 atl_debug(ctx, "Checking potential system physical address 0x%016llx\n",
1097 denorm_ctx->current_spa);
1098
1099 if (!match_logical_coh_st_fabric_id(ctx, denorm_ctx))
1100 continue;
1101
1102 if (!match_norm_addr(ctx, denorm_ctx))
1103 continue;
1104
1105 if (denorm_ctx->resolved_spa == INVALID_SPA ||
1106 denorm_ctx->current_spa > denorm_ctx->resolved_spa)
1107 denorm_ctx->resolved_spa = denorm_ctx->current_spa;
1108 }
1109
1110 if (denorm_ctx->resolved_spa == INVALID_SPA) {
1111 atl_debug(ctx, "Failed to find valid SPA for normalized address 0x%016llx\n",
1112 ctx->ret_addr);
1113 return -EINVAL;
1114 }
1115
1116 /* Return the resolved SPA without the base, without the MMIO hole */
1117 ctx->ret_addr = remove_base_and_hole(ctx, denorm_ctx->resolved_spa);
1118
1119 return 0;
1120 }
1121
init_df4p5_denorm_ctx(struct addr_ctx * ctx,struct df4p5_denorm_ctx * denorm_ctx)1122 static int init_df4p5_denorm_ctx(struct addr_ctx *ctx, struct df4p5_denorm_ctx *denorm_ctx)
1123 {
1124 denorm_ctx->current_spa = INVALID_SPA;
1125 denorm_ctx->resolved_spa = INVALID_SPA;
1126
1127 switch (ctx->map.intlv_mode) {
1128 case DF4p5_NPS0_24CHAN_1K_HASH:
1129 denorm_ctx->perm_shift = 3;
1130 denorm_ctx->rehash_vector = BIT(8) | BIT(9) | BIT(12);
1131 break;
1132
1133 case DF4p5_NPS0_24CHAN_2K_HASH:
1134 denorm_ctx->perm_shift = 3;
1135 denorm_ctx->rehash_vector = BIT(8) | BIT(12) | BIT(13);
1136 break;
1137
1138 case DF4p5_NPS1_12CHAN_1K_HASH:
1139 denorm_ctx->perm_shift = 2;
1140 denorm_ctx->rehash_vector = BIT(8);
1141 break;
1142
1143 case DF4p5_NPS1_12CHAN_2K_HASH:
1144 denorm_ctx->perm_shift = 2;
1145 denorm_ctx->rehash_vector = BIT(8) | BIT(12);
1146 break;
1147
1148 case DF4p5_NPS2_6CHAN_1K_HASH:
1149 case DF4p5_NPS2_6CHAN_2K_HASH:
1150 case DF4p5_NPS1_10CHAN_1K_HASH:
1151 case DF4p5_NPS1_10CHAN_2K_HASH:
1152 denorm_ctx->perm_shift = 1;
1153 denorm_ctx->rehash_vector = BIT(8);
1154 break;
1155
1156 case DF4p5_NPS4_3CHAN_1K_HASH:
1157 case DF4p5_NPS2_5CHAN_1K_HASH:
1158 denorm_ctx->perm_shift = 2;
1159 denorm_ctx->rehash_vector = 0;
1160 break;
1161
1162 case DF4p5_NPS4_3CHAN_2K_HASH:
1163 case DF4p5_NPS2_5CHAN_2K_HASH:
1164 denorm_ctx->perm_shift = 1;
1165 denorm_ctx->rehash_vector = 0;
1166 break;
1167
1168 default:
1169 atl_debug_on_bad_intlv_mode(ctx);
1170 return -EINVAL;
1171 }
1172
1173 denorm_ctx->base_denorm_addr = FIELD_GET(GENMASK_ULL(7, 0), ctx->ret_addr);
1174
1175 switch (ctx->map.intlv_mode) {
1176 case DF4p5_NPS0_24CHAN_1K_HASH:
1177 case DF4p5_NPS1_12CHAN_1K_HASH:
1178 case DF4p5_NPS2_6CHAN_1K_HASH:
1179 case DF4p5_NPS4_3CHAN_1K_HASH:
1180 case DF4p5_NPS1_10CHAN_1K_HASH:
1181 case DF4p5_NPS2_5CHAN_1K_HASH:
1182 denorm_ctx->base_denorm_addr |= FIELD_GET(GENMASK_ULL(9, 8), ctx->ret_addr) << 10;
1183 denorm_ctx->div_addr = FIELD_GET(GENMASK_ULL(63, 10), ctx->ret_addr);
1184 break;
1185
1186 case DF4p5_NPS0_24CHAN_2K_HASH:
1187 case DF4p5_NPS1_12CHAN_2K_HASH:
1188 case DF4p5_NPS2_6CHAN_2K_HASH:
1189 case DF4p5_NPS4_3CHAN_2K_HASH:
1190 case DF4p5_NPS1_10CHAN_2K_HASH:
1191 case DF4p5_NPS2_5CHAN_2K_HASH:
1192 denorm_ctx->base_denorm_addr |= FIELD_GET(GENMASK_ULL(10, 8), ctx->ret_addr) << 9;
1193 denorm_ctx->div_addr = FIELD_GET(GENMASK_ULL(63, 11), ctx->ret_addr);
1194 break;
1195
1196 default:
1197 atl_debug_on_bad_intlv_mode(ctx);
1198 return -EINVAL;
1199 }
1200
1201 if (ctx->map.num_intlv_chan % 3 == 0)
1202 denorm_ctx->mod_value = 3;
1203 else
1204 denorm_ctx->mod_value = 5;
1205
1206 denorm_ctx->coh_st_fabric_id = get_logical_coh_st_fabric_id(ctx) - get_dst_fabric_id(ctx);
1207
1208 atl_debug(ctx, "Initialized df4p5_denorm_ctx:");
1209 atl_debug(ctx, " mod_value = %d", denorm_ctx->mod_value);
1210 atl_debug(ctx, " perm_shift = %d", denorm_ctx->perm_shift);
1211 atl_debug(ctx, " rehash_vector = 0x%x", denorm_ctx->rehash_vector);
1212 atl_debug(ctx, " base_denorm_addr = 0x%016llx", denorm_ctx->base_denorm_addr);
1213 atl_debug(ctx, " div_addr = 0x%016llx", denorm_ctx->div_addr);
1214 atl_debug(ctx, " coh_st_fabric_id = 0x%x", denorm_ctx->coh_st_fabric_id);
1215
1216 return 0;
1217 }
1218
1219 /*
1220 * For DF 4.5, parts of the physical address can be directly pulled from the
1221 * normalized address. The exact bits will differ between interleave modes, but
1222 * using NPS0_24CHAN_1K_HASH as an example, the normalized address consists of
1223 * bits [63:13] (divided by 3), bits [11:10], and bits [7:0] of the system
1224 * physical address.
1225 *
1226 * In this case, there is no way to reconstruct the missing bits (bits 8, 9,
1227 * and 12) from the normalized address. Additionally, when bits [63:13] are
1228 * divided by 3, the remainder is dropped. Determine the proper combination of
1229 * "lost" bits and dropped remainder by iterating through each possible
1230 * permutation of these bits and then normalizing the generated system physical
1231 * addresses. If the normalized address matches the address we are trying to
1232 * translate, then we have found the correct permutation of bits.
1233 */
denorm_addr_df4p5_np2(struct addr_ctx * ctx)1234 static int denorm_addr_df4p5_np2(struct addr_ctx *ctx)
1235 {
1236 struct df4p5_denorm_ctx denorm_ctx;
1237 int ret = 0;
1238
1239 memset(&denorm_ctx, 0, sizeof(denorm_ctx));
1240
1241 atl_debug(ctx, "Denormalizing DF 4.5 normalized address 0x%016llx", ctx->ret_addr);
1242
1243 ret = init_df4p5_denorm_ctx(ctx, &denorm_ctx);
1244 if (ret)
1245 return ret;
1246
1247 return check_permutations(ctx, &denorm_ctx);
1248 }
1249
denormalize_address(struct addr_ctx * ctx)1250 int denormalize_address(struct addr_ctx *ctx)
1251 {
1252 switch (ctx->map.intlv_mode) {
1253 case NONE:
1254 return 0;
1255 case DF4_NPS4_3CHAN_HASH:
1256 case DF4_NPS2_6CHAN_HASH:
1257 case DF4_NPS1_12CHAN_HASH:
1258 case DF4_NPS2_5CHAN_HASH:
1259 case DF4_NPS1_10CHAN_HASH:
1260 return denorm_addr_df4_np2(ctx);
1261 case DF4p5_NPS0_24CHAN_1K_HASH:
1262 case DF4p5_NPS4_3CHAN_1K_HASH:
1263 case DF4p5_NPS2_6CHAN_1K_HASH:
1264 case DF4p5_NPS1_12CHAN_1K_HASH:
1265 case DF4p5_NPS2_5CHAN_1K_HASH:
1266 case DF4p5_NPS1_10CHAN_1K_HASH:
1267 case DF4p5_NPS4_3CHAN_2K_HASH:
1268 case DF4p5_NPS2_6CHAN_2K_HASH:
1269 case DF4p5_NPS1_12CHAN_2K_HASH:
1270 case DF4p5_NPS0_24CHAN_2K_HASH:
1271 case DF4p5_NPS2_5CHAN_2K_HASH:
1272 case DF4p5_NPS1_10CHAN_2K_HASH:
1273 return denorm_addr_df4p5_np2(ctx);
1274 case DF3_6CHAN:
1275 return denorm_addr_df3_6chan(ctx);
1276 default:
1277 return denorm_addr_common(ctx);
1278 }
1279 }
1280