xref: /linux/drivers/ras/amd/atl/denormalize.c (revision ed7171ff9fabc49ae6ed42fbd082a576473836fc)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * AMD Address Translation Library
4  *
5  * denormalize.c : Functions to account for interleaving bits
6  *
7  * Copyright (c) 2023, Advanced Micro Devices, Inc.
8  * All Rights Reserved.
9  *
10  * Author: Yazen Ghannam <Yazen.Ghannam@amd.com>
11  */
12 
13 #include "internal.h"
14 
15 /*
16  * Returns the Destination Fabric ID. This is the first (lowest)
17  * COH_ST Fabric ID used within a DRAM Address map.
18  */
19 static u16 get_dst_fabric_id(struct addr_ctx *ctx)
20 {
21 	switch (df_cfg.rev) {
22 	case DF2:	return FIELD_GET(DF2_DST_FABRIC_ID,	ctx->map.limit);
23 	case DF3:	return FIELD_GET(DF3_DST_FABRIC_ID,	ctx->map.limit);
24 	case DF3p5:	return FIELD_GET(DF3p5_DST_FABRIC_ID,	ctx->map.limit);
25 	case DF4:	return FIELD_GET(DF4_DST_FABRIC_ID,	ctx->map.ctl);
26 	case DF4p5:	return FIELD_GET(DF4p5_DST_FABRIC_ID,	ctx->map.ctl);
27 	default:
28 			atl_debug_on_bad_df_rev();
29 			return 0;
30 	}
31 }
32 
33 /*
34  * Make a contiguous gap in address for N bits starting at bit P.
35  *
36  * Example:
37  * address bits:		[20:0]
38  * # of interleave bits    (n):	3
39  * starting interleave bit (p):	8
40  *
41  * expanded address bits:	[20+n : n+p][n+p-1 : p][p-1 : 0]
42  *				[23   :  11][10    : 8][7   : 0]
43  */
44 static u64 make_space_for_coh_st_id_at_intlv_bit(struct addr_ctx *ctx)
45 {
46 	return expand_bits(ctx->map.intlv_bit_pos,
47 			   ctx->map.total_intlv_bits,
48 			   ctx->ret_addr);
49 }
50 
51 /*
52  * Make two gaps in address for N bits.
53  * First gap is a single bit at bit P.
54  * Second gap is the remaining N-1 bits at bit 12.
55  *
56  * Example:
57  * address bits:		[20:0]
58  * # of interleave bits    (n):	3
59  * starting interleave bit (p):	8
60  *
61  * First gap
62  * expanded address bits:	[20+1 : p+1][p][p-1 : 0]
63  *				[21   :   9][8][7   : 0]
64  *
65  * Second gap uses result from first.
66  *				r = n - 1; remaining interleave bits
67  * expanded address bits:	[21+r : 12+r][12+r-1: 12][11 : 0]
68  *				[23   :   14][13    : 12][11 : 0]
69  */
70 static u64 make_space_for_coh_st_id_split_2_1(struct addr_ctx *ctx)
71 {
72 	/* Make a single space at the interleave bit. */
73 	u64 denorm_addr = expand_bits(ctx->map.intlv_bit_pos, 1, ctx->ret_addr);
74 
75 	/* Done if there's only a single interleave bit. */
76 	if (ctx->map.total_intlv_bits <= 1)
77 		return denorm_addr;
78 
79 	/* Make spaces for the remaining interleave bits starting at bit 12. */
80 	return expand_bits(12, ctx->map.total_intlv_bits - 1, denorm_addr);
81 }
82 
83 /*
84  * Make space for CS ID at bits [14:8] as follows:
85  *
86  * 8 channels	-> bits [10:8]
87  * 16 channels	-> bits [11:8]
88  * 32 channels	-> bits [14,11:8]
89  *
90  * 1 die	-> N/A
91  * 2 dies	-> bit  [12]
92  * 4 dies	-> bits [13:12]
93  */
94 static u64 make_space_for_coh_st_id_mi300(struct addr_ctx *ctx)
95 {
96 	u8 num_intlv_bits = ilog2(ctx->map.num_intlv_chan);
97 	u64 denorm_addr;
98 
99 	if (ctx->map.intlv_bit_pos != 8) {
100 		pr_debug("Invalid interleave bit: %u", ctx->map.intlv_bit_pos);
101 		return ~0ULL;
102 	}
103 
104 	/* Channel bits. Covers up to 4 bits at [11:8]. */
105 	denorm_addr = expand_bits(8, min(num_intlv_bits, 4), ctx->ret_addr);
106 
107 	/* Die bits. Always starts at [12]. */
108 	denorm_addr = expand_bits(12, ilog2(ctx->map.num_intlv_dies), denorm_addr);
109 
110 	/* Additional channel bit at [14]. */
111 	if (num_intlv_bits > 4)
112 		denorm_addr = expand_bits(14, 1, denorm_addr);
113 
114 	return denorm_addr;
115 }
116 
117 /*
118  * Take the current calculated address and shift enough bits in the middle
119  * to make a gap where the interleave bits will be inserted.
120  */
121 static u64 make_space_for_coh_st_id(struct addr_ctx *ctx)
122 {
123 	switch (ctx->map.intlv_mode) {
124 	case NOHASH_2CHAN:
125 	case NOHASH_4CHAN:
126 	case NOHASH_8CHAN:
127 	case NOHASH_16CHAN:
128 	case NOHASH_32CHAN:
129 	case DF2_2CHAN_HASH:
130 		return make_space_for_coh_st_id_at_intlv_bit(ctx);
131 
132 	case DF3_COD4_2CHAN_HASH:
133 	case DF3_COD2_4CHAN_HASH:
134 	case DF3_COD1_8CHAN_HASH:
135 	case DF4_NPS4_2CHAN_HASH:
136 	case DF4_NPS2_4CHAN_HASH:
137 	case DF4_NPS1_8CHAN_HASH:
138 	case DF4p5_NPS4_2CHAN_1K_HASH:
139 	case DF4p5_NPS4_2CHAN_2K_HASH:
140 	case DF4p5_NPS2_4CHAN_2K_HASH:
141 	case DF4p5_NPS1_8CHAN_2K_HASH:
142 	case DF4p5_NPS1_16CHAN_2K_HASH:
143 		return make_space_for_coh_st_id_split_2_1(ctx);
144 
145 	case MI3_HASH_8CHAN:
146 	case MI3_HASH_16CHAN:
147 	case MI3_HASH_32CHAN:
148 		return make_space_for_coh_st_id_mi300(ctx);
149 
150 	default:
151 		atl_debug_on_bad_intlv_mode(ctx);
152 		return ~0ULL;
153 	}
154 }
155 
156 static u16 get_coh_st_id_df2(struct addr_ctx *ctx)
157 {
158 	u8 num_socket_intlv_bits = ilog2(ctx->map.num_intlv_sockets);
159 	u8 num_die_intlv_bits = ilog2(ctx->map.num_intlv_dies);
160 	u8 num_intlv_bits;
161 	u16 coh_st_id, mask;
162 
163 	coh_st_id = ctx->coh_st_fabric_id - get_dst_fabric_id(ctx);
164 
165 	/* Channel interleave bits */
166 	num_intlv_bits = order_base_2(ctx->map.num_intlv_chan);
167 	mask = GENMASK(num_intlv_bits - 1, 0);
168 	coh_st_id &= mask;
169 
170 	/* Die interleave bits */
171 	if (num_die_intlv_bits) {
172 		u16 die_bits;
173 
174 		mask = GENMASK(num_die_intlv_bits - 1, 0);
175 		die_bits = ctx->coh_st_fabric_id & df_cfg.die_id_mask;
176 		die_bits >>= df_cfg.die_id_shift;
177 
178 		coh_st_id |= (die_bits & mask) << num_intlv_bits;
179 		num_intlv_bits += num_die_intlv_bits;
180 	}
181 
182 	/* Socket interleave bits */
183 	if (num_socket_intlv_bits) {
184 		u16 socket_bits;
185 
186 		mask = GENMASK(num_socket_intlv_bits - 1, 0);
187 		socket_bits = ctx->coh_st_fabric_id & df_cfg.socket_id_mask;
188 		socket_bits >>= df_cfg.socket_id_shift;
189 
190 		coh_st_id |= (socket_bits & mask) << num_intlv_bits;
191 	}
192 
193 	return coh_st_id;
194 }
195 
196 static u16 get_coh_st_id_df4(struct addr_ctx *ctx)
197 {
198 	/*
199 	 * Start with the original component mask and the number of interleave
200 	 * bits for the channels in this map.
201 	 */
202 	u8 num_intlv_bits = ilog2(ctx->map.num_intlv_chan);
203 	u16 mask = df_cfg.component_id_mask;
204 
205 	u16 socket_bits;
206 
207 	/* Set the derived Coherent Station ID to the input Coherent Station Fabric ID. */
208 	u16 coh_st_id = ctx->coh_st_fabric_id & mask;
209 
210 	/*
211 	 * Subtract the "base" Destination Fabric ID.
212 	 * This accounts for systems with disabled Coherent Stations.
213 	 */
214 	coh_st_id -= get_dst_fabric_id(ctx) & mask;
215 
216 	/*
217 	 * Generate and use a new mask based on the number of bits
218 	 * needed for channel interleaving in this map.
219 	 */
220 	mask = GENMASK(num_intlv_bits - 1, 0);
221 	coh_st_id &= mask;
222 
223 	/* Done if socket interleaving is not enabled. */
224 	if (ctx->map.num_intlv_sockets <= 1)
225 		return coh_st_id;
226 
227 	/*
228 	 * Figure out how many bits are needed for the number of
229 	 * interleaved sockets. And shift the derived Coherent Station ID to account
230 	 * for these.
231 	 */
232 	num_intlv_bits = ilog2(ctx->map.num_intlv_sockets);
233 	coh_st_id <<= num_intlv_bits;
234 
235 	/* Generate a new mask for the socket interleaving bits. */
236 	mask = GENMASK(num_intlv_bits - 1, 0);
237 
238 	/* Get the socket interleave bits from the original Coherent Station Fabric ID. */
239 	socket_bits = (ctx->coh_st_fabric_id & df_cfg.socket_id_mask) >> df_cfg.socket_id_shift;
240 
241 	/* Apply the appropriate socket bits to the derived Coherent Station ID. */
242 	coh_st_id |= socket_bits & mask;
243 
244 	return coh_st_id;
245 }
246 
247 /*
248  * MI300 hash has:
249  * (C)hannel[3:0]	= coh_st_id[3:0]
250  * (S)tack[0]		= coh_st_id[4]
251  * (D)ie[1:0]		= coh_st_id[6:5]
252  *
253  * Hashed coh_st_id is swizzled so that Stack bit is at the end.
254  * coh_st_id = SDDCCCC
255  */
256 static u16 get_coh_st_id_mi300(struct addr_ctx *ctx)
257 {
258 	u8 channel_bits, die_bits, stack_bit;
259 	u16 die_id;
260 
261 	/* Subtract the "base" Destination Fabric ID. */
262 	ctx->coh_st_fabric_id -= get_dst_fabric_id(ctx);
263 
264 	die_id = (ctx->coh_st_fabric_id & df_cfg.die_id_mask) >> df_cfg.die_id_shift;
265 
266 	channel_bits	= FIELD_GET(GENMASK(3, 0), ctx->coh_st_fabric_id);
267 	stack_bit	= FIELD_GET(BIT(4), ctx->coh_st_fabric_id) << 6;
268 	die_bits	= die_id << 4;
269 
270 	return stack_bit | die_bits | channel_bits;
271 }
272 
273 /*
274  * Derive the correct Coherent Station ID that represents the interleave bits
275  * used within the system physical address. This accounts for the
276  * interleave mode, number of interleaved channels/dies/sockets, and
277  * other system/mode-specific bit swizzling.
278  *
279  * Returns:	Coherent Station ID on success.
280  *		All bits set on error.
281  */
282 static u16 calculate_coh_st_id(struct addr_ctx *ctx)
283 {
284 	switch (ctx->map.intlv_mode) {
285 	case NOHASH_2CHAN:
286 	case NOHASH_4CHAN:
287 	case NOHASH_8CHAN:
288 	case NOHASH_16CHAN:
289 	case NOHASH_32CHAN:
290 	case DF3_COD4_2CHAN_HASH:
291 	case DF3_COD2_4CHAN_HASH:
292 	case DF3_COD1_8CHAN_HASH:
293 	case DF2_2CHAN_HASH:
294 		return get_coh_st_id_df2(ctx);
295 
296 	case DF4_NPS4_2CHAN_HASH:
297 	case DF4_NPS2_4CHAN_HASH:
298 	case DF4_NPS1_8CHAN_HASH:
299 	case DF4p5_NPS4_2CHAN_1K_HASH:
300 	case DF4p5_NPS4_2CHAN_2K_HASH:
301 	case DF4p5_NPS2_4CHAN_2K_HASH:
302 	case DF4p5_NPS1_8CHAN_2K_HASH:
303 	case DF4p5_NPS1_16CHAN_2K_HASH:
304 		return get_coh_st_id_df4(ctx);
305 
306 	case MI3_HASH_8CHAN:
307 	case MI3_HASH_16CHAN:
308 	case MI3_HASH_32CHAN:
309 		return get_coh_st_id_mi300(ctx);
310 
311 	/* COH_ST ID is simply the COH_ST Fabric ID adjusted by the Destination Fabric ID. */
312 	case DF4p5_NPS2_4CHAN_1K_HASH:
313 	case DF4p5_NPS1_8CHAN_1K_HASH:
314 	case DF4p5_NPS1_16CHAN_1K_HASH:
315 		return ctx->coh_st_fabric_id - get_dst_fabric_id(ctx);
316 
317 	default:
318 		atl_debug_on_bad_intlv_mode(ctx);
319 		return ~0;
320 	}
321 }
322 
323 static u64 insert_coh_st_id_at_intlv_bit(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id)
324 {
325 	return denorm_addr | (coh_st_id << ctx->map.intlv_bit_pos);
326 }
327 
328 static u64 insert_coh_st_id_split_2_1(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id)
329 {
330 	/* Insert coh_st_id[0] at the interleave bit. */
331 	denorm_addr |= (coh_st_id & BIT(0)) << ctx->map.intlv_bit_pos;
332 
333 	/* Insert coh_st_id[2:1] at bit 12. */
334 	denorm_addr |= (coh_st_id & GENMASK(2, 1)) << 11;
335 
336 	return denorm_addr;
337 }
338 
339 static u64 insert_coh_st_id_split_2_2(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id)
340 {
341 	/* Insert coh_st_id[1:0] at bit 8. */
342 	denorm_addr |= (coh_st_id & GENMASK(1, 0)) << 8;
343 
344 	/*
345 	 * Insert coh_st_id[n:2] at bit 12. 'n' could be 2 or 3.
346 	 * Grab both because bit 3 will be clear if unused.
347 	 */
348 	denorm_addr |= (coh_st_id & GENMASK(3, 2)) << 10;
349 
350 	return denorm_addr;
351 }
352 
353 static u64 insert_coh_st_id(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id)
354 {
355 	switch (ctx->map.intlv_mode) {
356 	case NOHASH_2CHAN:
357 	case NOHASH_4CHAN:
358 	case NOHASH_8CHAN:
359 	case NOHASH_16CHAN:
360 	case NOHASH_32CHAN:
361 	case MI3_HASH_8CHAN:
362 	case MI3_HASH_16CHAN:
363 	case MI3_HASH_32CHAN:
364 	case DF2_2CHAN_HASH:
365 		return insert_coh_st_id_at_intlv_bit(ctx, denorm_addr, coh_st_id);
366 
367 	case DF3_COD4_2CHAN_HASH:
368 	case DF3_COD2_4CHAN_HASH:
369 	case DF3_COD1_8CHAN_HASH:
370 	case DF4_NPS4_2CHAN_HASH:
371 	case DF4_NPS2_4CHAN_HASH:
372 	case DF4_NPS1_8CHAN_HASH:
373 	case DF4p5_NPS4_2CHAN_1K_HASH:
374 	case DF4p5_NPS4_2CHAN_2K_HASH:
375 	case DF4p5_NPS2_4CHAN_2K_HASH:
376 	case DF4p5_NPS1_8CHAN_2K_HASH:
377 	case DF4p5_NPS1_16CHAN_2K_HASH:
378 		return insert_coh_st_id_split_2_1(ctx, denorm_addr, coh_st_id);
379 
380 	case DF4p5_NPS2_4CHAN_1K_HASH:
381 	case DF4p5_NPS1_8CHAN_1K_HASH:
382 	case DF4p5_NPS1_16CHAN_1K_HASH:
383 		return insert_coh_st_id_split_2_2(ctx, denorm_addr, coh_st_id);
384 
385 	default:
386 		atl_debug_on_bad_intlv_mode(ctx);
387 		return ~0ULL;
388 	}
389 }
390 
391 /*
392  * MI300 systems have a fixed, hardware-defined physical-to-logical
393  * Coherent Station mapping. The Remap registers are not used.
394  */
395 static const u16 phy_to_log_coh_st_map_mi300[] = {
396 	12, 13, 14, 15,
397 	 8,  9, 10, 11,
398 	 4,  5,  6,  7,
399 	 0,  1,  2,  3,
400 	28, 29, 30, 31,
401 	24, 25, 26, 27,
402 	20, 21, 22, 23,
403 	16, 17, 18, 19,
404 };
405 
406 static u16 get_logical_coh_st_fabric_id_mi300(struct addr_ctx *ctx)
407 {
408 	if (ctx->inst_id >= ARRAY_SIZE(phy_to_log_coh_st_map_mi300)) {
409 		atl_debug(ctx, "Instance ID out of range");
410 		return ~0;
411 	}
412 
413 	return phy_to_log_coh_st_map_mi300[ctx->inst_id] | (ctx->node_id << df_cfg.node_id_shift);
414 }
415 
416 static u16 get_logical_coh_st_fabric_id(struct addr_ctx *ctx)
417 {
418 	u16 component_id, log_fabric_id;
419 
420 	/* Start with the physical COH_ST Fabric ID. */
421 	u16 phys_fabric_id = ctx->coh_st_fabric_id;
422 
423 	if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous)
424 		return get_logical_coh_st_fabric_id_mi300(ctx);
425 
426 	/* Skip logical ID lookup if remapping is disabled. */
427 	if (!FIELD_GET(DF4_REMAP_EN, ctx->map.ctl) &&
428 	    ctx->map.intlv_mode != DF3_6CHAN)
429 		return phys_fabric_id;
430 
431 	/* Mask off the Node ID bits to get the "local" Component ID. */
432 	component_id = phys_fabric_id & df_cfg.component_id_mask;
433 
434 	/*
435 	 * Search the list of logical Component IDs for the one that
436 	 * matches this physical Component ID.
437 	 */
438 	for (log_fabric_id = 0; log_fabric_id < MAX_COH_ST_CHANNELS; log_fabric_id++) {
439 		if (ctx->map.remap_array[log_fabric_id] == component_id)
440 			break;
441 	}
442 
443 	if (log_fabric_id == MAX_COH_ST_CHANNELS)
444 		atl_debug(ctx, "COH_ST remap entry not found for 0x%x",
445 			  log_fabric_id);
446 
447 	/* Get the Node ID bits from the physical and apply to the logical. */
448 	return (phys_fabric_id & df_cfg.node_id_mask) | log_fabric_id;
449 }
450 
451 static u16 get_logical_coh_st_fabric_id_for_current_spa(struct addr_ctx *ctx,
452 							struct df4p5_denorm_ctx *denorm_ctx)
453 {
454 	bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G, hash_ctl_1T;
455 	bool hash_pa8, hash_pa9, hash_pa12, hash_pa13;
456 	u64 cs_id = 0;
457 
458 	hash_ctl_64k	= FIELD_GET(DF4_HASH_CTL_64K,  ctx->map.ctl);
459 	hash_ctl_2M	= FIELD_GET(DF4_HASH_CTL_2M,   ctx->map.ctl);
460 	hash_ctl_1G	= FIELD_GET(DF4_HASH_CTL_1G,   ctx->map.ctl);
461 	hash_ctl_1T	= FIELD_GET(DF4p5_HASH_CTL_1T, ctx->map.ctl);
462 
463 	hash_pa8  = FIELD_GET(BIT_ULL(8),  denorm_ctx->current_spa);
464 	hash_pa8 ^= FIELD_GET(BIT_ULL(14), denorm_ctx->current_spa);
465 	hash_pa8 ^= FIELD_GET(BIT_ULL(16), denorm_ctx->current_spa) & hash_ctl_64k;
466 	hash_pa8 ^= FIELD_GET(BIT_ULL(21), denorm_ctx->current_spa) & hash_ctl_2M;
467 	hash_pa8 ^= FIELD_GET(BIT_ULL(30), denorm_ctx->current_spa) & hash_ctl_1G;
468 	hash_pa8 ^= FIELD_GET(BIT_ULL(40), denorm_ctx->current_spa) & hash_ctl_1T;
469 
470 	hash_pa9  = FIELD_GET(BIT_ULL(9),  denorm_ctx->current_spa);
471 	hash_pa9 ^= FIELD_GET(BIT_ULL(17), denorm_ctx->current_spa) & hash_ctl_64k;
472 	hash_pa9 ^= FIELD_GET(BIT_ULL(22), denorm_ctx->current_spa) & hash_ctl_2M;
473 	hash_pa9 ^= FIELD_GET(BIT_ULL(31), denorm_ctx->current_spa) & hash_ctl_1G;
474 	hash_pa9 ^= FIELD_GET(BIT_ULL(41), denorm_ctx->current_spa) & hash_ctl_1T;
475 
476 	hash_pa12  = FIELD_GET(BIT_ULL(12), denorm_ctx->current_spa);
477 	hash_pa12 ^= FIELD_GET(BIT_ULL(18), denorm_ctx->current_spa) & hash_ctl_64k;
478 	hash_pa12 ^= FIELD_GET(BIT_ULL(23), denorm_ctx->current_spa) & hash_ctl_2M;
479 	hash_pa12 ^= FIELD_GET(BIT_ULL(32), denorm_ctx->current_spa) & hash_ctl_1G;
480 	hash_pa12 ^= FIELD_GET(BIT_ULL(42), denorm_ctx->current_spa) & hash_ctl_1T;
481 
482 	hash_pa13  = FIELD_GET(BIT_ULL(13), denorm_ctx->current_spa);
483 	hash_pa13 ^= FIELD_GET(BIT_ULL(19), denorm_ctx->current_spa) & hash_ctl_64k;
484 	hash_pa13 ^= FIELD_GET(BIT_ULL(24), denorm_ctx->current_spa) & hash_ctl_2M;
485 	hash_pa13 ^= FIELD_GET(BIT_ULL(33), denorm_ctx->current_spa) & hash_ctl_1G;
486 	hash_pa13 ^= FIELD_GET(BIT_ULL(43), denorm_ctx->current_spa) & hash_ctl_1T;
487 
488 	switch (ctx->map.intlv_mode) {
489 	case DF4p5_NPS0_24CHAN_1K_HASH:
490 		cs_id = FIELD_GET(GENMASK_ULL(63, 13), denorm_ctx->current_spa) << 3;
491 		cs_id %= denorm_ctx->mod_value;
492 		cs_id <<= 2;
493 		cs_id |= (hash_pa9 | (hash_pa12 << 1));
494 		cs_id |= hash_pa8 << df_cfg.socket_id_shift;
495 		break;
496 
497 	case DF4p5_NPS0_24CHAN_2K_HASH:
498 		cs_id = FIELD_GET(GENMASK_ULL(63, 14), denorm_ctx->current_spa) << 4;
499 		cs_id %= denorm_ctx->mod_value;
500 		cs_id <<= 2;
501 		cs_id |= (hash_pa12 | (hash_pa13 << 1));
502 		cs_id |= hash_pa8 << df_cfg.socket_id_shift;
503 		break;
504 
505 	case DF4p5_NPS1_12CHAN_1K_HASH:
506 		cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2;
507 		cs_id %= denorm_ctx->mod_value;
508 		cs_id <<= 2;
509 		cs_id |= (hash_pa8 | (hash_pa9 << 1));
510 		break;
511 
512 	case DF4p5_NPS1_12CHAN_2K_HASH:
513 		cs_id = FIELD_GET(GENMASK_ULL(63, 13), denorm_ctx->current_spa) << 3;
514 		cs_id %= denorm_ctx->mod_value;
515 		cs_id <<= 2;
516 		cs_id |= (hash_pa8 | (hash_pa12 << 1));
517 		break;
518 
519 	case DF4p5_NPS2_6CHAN_1K_HASH:
520 	case DF4p5_NPS1_10CHAN_1K_HASH:
521 		cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2;
522 		cs_id |= (FIELD_GET(BIT_ULL(9), denorm_ctx->current_spa) << 1);
523 		cs_id %= denorm_ctx->mod_value;
524 		cs_id <<= 1;
525 		cs_id |= hash_pa8;
526 		break;
527 
528 	case DF4p5_NPS2_6CHAN_2K_HASH:
529 	case DF4p5_NPS1_10CHAN_2K_HASH:
530 		cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2;
531 		cs_id %= denorm_ctx->mod_value;
532 		cs_id <<= 1;
533 		cs_id |= hash_pa8;
534 		break;
535 
536 	case DF4p5_NPS4_3CHAN_1K_HASH:
537 	case DF4p5_NPS2_5CHAN_1K_HASH:
538 		cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2;
539 		cs_id |= FIELD_GET(GENMASK_ULL(9, 8), denorm_ctx->current_spa);
540 		cs_id %= denorm_ctx->mod_value;
541 		break;
542 
543 	case DF4p5_NPS4_3CHAN_2K_HASH:
544 	case DF4p5_NPS2_5CHAN_2K_HASH:
545 		cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2;
546 		cs_id |= FIELD_GET(BIT_ULL(8), denorm_ctx->current_spa) << 1;
547 		cs_id %= denorm_ctx->mod_value;
548 		break;
549 
550 	default:
551 		atl_debug_on_bad_intlv_mode(ctx);
552 		return 0;
553 	}
554 
555 	if (cs_id > 0xffff) {
556 		atl_debug(ctx, "Translation error: Resulting cs_id larger than u16\n");
557 		return 0;
558 	}
559 
560 	return cs_id;
561 }
562 
563 static int denorm_addr_common(struct addr_ctx *ctx)
564 {
565 	u64 denorm_addr;
566 	u16 coh_st_id;
567 
568 	/*
569 	 * Convert the original physical COH_ST Fabric ID to a logical value.
570 	 * This is required for non-power-of-two and other interleaving modes.
571 	 */
572 	ctx->coh_st_fabric_id = get_logical_coh_st_fabric_id(ctx);
573 
574 	denorm_addr = make_space_for_coh_st_id(ctx);
575 	coh_st_id = calculate_coh_st_id(ctx);
576 	ctx->ret_addr = insert_coh_st_id(ctx, denorm_addr, coh_st_id);
577 	return 0;
578 }
579 
580 static int denorm_addr_df3_6chan(struct addr_ctx *ctx)
581 {
582 	u16 coh_st_id = ctx->coh_st_fabric_id & df_cfg.component_id_mask;
583 	u8 total_intlv_bits = ctx->map.total_intlv_bits;
584 	u8 low_bit, intlv_bit = ctx->map.intlv_bit_pos;
585 	u64 msb_intlv_bits, temp_addr_a, temp_addr_b;
586 	u8 np2_bits = ctx->map.np2_bits;
587 
588 	if (ctx->map.intlv_mode != DF3_6CHAN)
589 		return -EINVAL;
590 
591 	/*
592 	 * 'np2_bits' holds the number of bits needed to cover the
593 	 * amount of memory (rounded up) in this map using 64K chunks.
594 	 *
595 	 * Example:
596 	 * Total memory in map:			6GB
597 	 * Rounded up to next power-of-2:	8GB
598 	 * Number of 64K chunks:		0x20000
599 	 * np2_bits = log2(# of chunks):	17
600 	 *
601 	 * Get the two most-significant interleave bits from the
602 	 * input address based on the following:
603 	 *
604 	 * [15 + np2_bits - total_intlv_bits : 14 + np2_bits - total_intlv_bits]
605 	 */
606 	low_bit = 14 + np2_bits - total_intlv_bits;
607 	msb_intlv_bits = ctx->ret_addr >> low_bit;
608 	msb_intlv_bits &= 0x3;
609 
610 	/*
611 	 * If MSB are 11b, then logical COH_ST ID is 6 or 7.
612 	 * Need to adjust based on the mod3 result.
613 	 */
614 	if (msb_intlv_bits == 3) {
615 		u8 addr_mod, phys_addr_msb, msb_coh_st_id;
616 
617 		/* Get the remaining interleave bits from the input address. */
618 		temp_addr_b = GENMASK_ULL(low_bit - 1, intlv_bit) & ctx->ret_addr;
619 		temp_addr_b >>= intlv_bit;
620 
621 		/* Calculate the logical COH_ST offset based on mod3. */
622 		addr_mod = temp_addr_b % 3;
623 
624 		/* Get COH_ST ID bits [2:1]. */
625 		msb_coh_st_id = (coh_st_id >> 1) & 0x3;
626 
627 		/* Get the bit that starts the physical address bits. */
628 		phys_addr_msb = (intlv_bit + np2_bits + 1);
629 		phys_addr_msb &= BIT(0);
630 		phys_addr_msb++;
631 		phys_addr_msb *= 3 - addr_mod + msb_coh_st_id;
632 		phys_addr_msb %= 3;
633 
634 		/* Move the physical address MSB to the correct place. */
635 		temp_addr_b |= phys_addr_msb << (low_bit - total_intlv_bits - intlv_bit);
636 
637 		/* Generate a new COH_ST ID as follows: coh_st_id = [1, 1, coh_st_id[0]] */
638 		coh_st_id &= BIT(0);
639 		coh_st_id |= GENMASK(2, 1);
640 	} else {
641 		temp_addr_b = GENMASK_ULL(63, intlv_bit) & ctx->ret_addr;
642 		temp_addr_b >>= intlv_bit;
643 	}
644 
645 	temp_addr_a = GENMASK_ULL(intlv_bit - 1, 0) & ctx->ret_addr;
646 	temp_addr_b <<= intlv_bit + total_intlv_bits;
647 
648 	ctx->ret_addr = temp_addr_a | temp_addr_b;
649 	ctx->ret_addr |= coh_st_id << intlv_bit;
650 	return 0;
651 }
652 
653 static int denorm_addr_df4_np2(struct addr_ctx *ctx)
654 {
655 	bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G;
656 	u16 group, group_offset, log_coh_st_offset;
657 	unsigned int mod_value, shift_value;
658 	u16 mask = df_cfg.component_id_mask;
659 	u64 temp_addr_a, temp_addr_b;
660 	bool hash_pa8, hashed_bit;
661 
662 	switch (ctx->map.intlv_mode) {
663 	case DF4_NPS4_3CHAN_HASH:
664 		mod_value	= 3;
665 		shift_value	= 13;
666 		break;
667 	case DF4_NPS2_6CHAN_HASH:
668 		mod_value	= 3;
669 		shift_value	= 12;
670 		break;
671 	case DF4_NPS1_12CHAN_HASH:
672 		mod_value	= 3;
673 		shift_value	= 11;
674 		break;
675 	case DF4_NPS2_5CHAN_HASH:
676 		mod_value	= 5;
677 		shift_value	= 13;
678 		break;
679 	case DF4_NPS1_10CHAN_HASH:
680 		mod_value	= 5;
681 		shift_value	= 12;
682 		break;
683 	default:
684 		atl_debug_on_bad_intlv_mode(ctx);
685 		return -EINVAL;
686 	};
687 
688 	if (ctx->map.num_intlv_sockets == 1) {
689 		hash_pa8	= BIT_ULL(shift_value) & ctx->ret_addr;
690 		temp_addr_a	= remove_bits(shift_value, shift_value, ctx->ret_addr);
691 	} else {
692 		hash_pa8	= ctx->coh_st_fabric_id & df_cfg.socket_id_mask;
693 		temp_addr_a	= ctx->ret_addr;
694 	}
695 
696 	/* Make a gap for the real bit [8]. */
697 	temp_addr_a = expand_bits(8, 1, temp_addr_a);
698 
699 	/* Make an additional gap for bits [13:12], as appropriate.*/
700 	if (ctx->map.intlv_mode == DF4_NPS2_6CHAN_HASH ||
701 	    ctx->map.intlv_mode == DF4_NPS1_10CHAN_HASH) {
702 		temp_addr_a = expand_bits(13, 1, temp_addr_a);
703 	} else if (ctx->map.intlv_mode == DF4_NPS1_12CHAN_HASH) {
704 		temp_addr_a = expand_bits(12, 2, temp_addr_a);
705 	}
706 
707 	/* Keep bits [13:0]. */
708 	temp_addr_a &= GENMASK_ULL(13, 0);
709 
710 	/* Get the appropriate high bits. */
711 	shift_value += 1 - ilog2(ctx->map.num_intlv_sockets);
712 	temp_addr_b = GENMASK_ULL(63, shift_value) & ctx->ret_addr;
713 	temp_addr_b >>= shift_value;
714 	temp_addr_b *= mod_value;
715 
716 	/*
717 	 * Coherent Stations are divided into groups.
718 	 *
719 	 * Multiples of 3 (mod3) are divided into quadrants.
720 	 * e.g. NP4_3CHAN ->	[0, 1, 2] [6, 7, 8]
721 	 *			[3, 4, 5] [9, 10, 11]
722 	 *
723 	 * Multiples of 5 (mod5) are divided into sides.
724 	 * e.g. NP2_5CHAN ->	[0, 1, 2, 3, 4] [5, 6, 7, 8, 9]
725 	 */
726 
727 	 /*
728 	  * Calculate the logical offset for the COH_ST within its DRAM Address map.
729 	  * e.g. if map includes [5, 6, 7, 8, 9] and target instance is '8', then
730 	  *	 log_coh_st_offset = 8 - 5 = 3
731 	  */
732 	log_coh_st_offset = (ctx->coh_st_fabric_id & mask) - (get_dst_fabric_id(ctx) & mask);
733 
734 	/*
735 	 * Figure out the group number.
736 	 *
737 	 * Following above example,
738 	 * log_coh_st_offset = 3
739 	 * mod_value = 5
740 	 * group = 3 / 5 = 0
741 	 */
742 	group = log_coh_st_offset / mod_value;
743 
744 	/*
745 	 * Figure out the offset within the group.
746 	 *
747 	 * Following above example,
748 	 * log_coh_st_offset = 3
749 	 * mod_value = 5
750 	 * group_offset = 3 % 5 = 3
751 	 */
752 	group_offset = log_coh_st_offset % mod_value;
753 
754 	/* Adjust group_offset if the hashed bit [8] is set. */
755 	if (hash_pa8) {
756 		if (!group_offset)
757 			group_offset = mod_value - 1;
758 		else
759 			group_offset--;
760 	}
761 
762 	/* Add in the group offset to the high bits. */
763 	temp_addr_b += group_offset;
764 
765 	/* Shift the high bits to the proper starting position. */
766 	temp_addr_b <<= 14;
767 
768 	/* Combine the high and low bits together. */
769 	ctx->ret_addr = temp_addr_a | temp_addr_b;
770 
771 	/* Account for hashing here instead of in dehash_address(). */
772 	hash_ctl_64k	= FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl);
773 	hash_ctl_2M	= FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl);
774 	hash_ctl_1G	= FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl);
775 
776 	hashed_bit = !!hash_pa8;
777 	hashed_bit ^= FIELD_GET(BIT_ULL(14), ctx->ret_addr);
778 	hashed_bit ^= FIELD_GET(BIT_ULL(16), ctx->ret_addr) & hash_ctl_64k;
779 	hashed_bit ^= FIELD_GET(BIT_ULL(21), ctx->ret_addr) & hash_ctl_2M;
780 	hashed_bit ^= FIELD_GET(BIT_ULL(30), ctx->ret_addr) & hash_ctl_1G;
781 
782 	ctx->ret_addr |= hashed_bit << 8;
783 
784 	/* Done for 3 and 5 channel. */
785 	if (ctx->map.intlv_mode == DF4_NPS4_3CHAN_HASH ||
786 	    ctx->map.intlv_mode == DF4_NPS2_5CHAN_HASH)
787 		return 0;
788 
789 	/* Select the proper 'group' bit to use for Bit 13. */
790 	if (ctx->map.intlv_mode == DF4_NPS1_12CHAN_HASH)
791 		hashed_bit = !!(group & BIT(1));
792 	else
793 		hashed_bit = group & BIT(0);
794 
795 	hashed_bit ^= FIELD_GET(BIT_ULL(18), ctx->ret_addr) & hash_ctl_64k;
796 	hashed_bit ^= FIELD_GET(BIT_ULL(23), ctx->ret_addr) & hash_ctl_2M;
797 	hashed_bit ^= FIELD_GET(BIT_ULL(32), ctx->ret_addr) & hash_ctl_1G;
798 
799 	ctx->ret_addr |= hashed_bit << 13;
800 
801 	/* Done for 6 and 10 channel. */
802 	if (ctx->map.intlv_mode != DF4_NPS1_12CHAN_HASH)
803 		return 0;
804 
805 	hashed_bit = group & BIT(0);
806 	hashed_bit ^= FIELD_GET(BIT_ULL(17), ctx->ret_addr) & hash_ctl_64k;
807 	hashed_bit ^= FIELD_GET(BIT_ULL(22), ctx->ret_addr) & hash_ctl_2M;
808 	hashed_bit ^= FIELD_GET(BIT_ULL(31), ctx->ret_addr) & hash_ctl_1G;
809 
810 	ctx->ret_addr |= hashed_bit << 12;
811 	return 0;
812 }
813 
814 static u64 normalize_addr_df4p5_np2(struct addr_ctx *ctx, struct df4p5_denorm_ctx *denorm_ctx,
815 				    u64 addr)
816 {
817 	u64 temp_addr_a = 0, temp_addr_b = 0;
818 
819 	switch (ctx->map.intlv_mode) {
820 	case DF4p5_NPS0_24CHAN_1K_HASH:
821 	case DF4p5_NPS1_12CHAN_1K_HASH:
822 	case DF4p5_NPS2_6CHAN_1K_HASH:
823 	case DF4p5_NPS4_3CHAN_1K_HASH:
824 	case DF4p5_NPS1_10CHAN_1K_HASH:
825 	case DF4p5_NPS2_5CHAN_1K_HASH:
826 		temp_addr_a = FIELD_GET(GENMASK_ULL(11, 10), addr) << 8;
827 		break;
828 
829 	case DF4p5_NPS0_24CHAN_2K_HASH:
830 	case DF4p5_NPS1_12CHAN_2K_HASH:
831 	case DF4p5_NPS2_6CHAN_2K_HASH:
832 	case DF4p5_NPS4_3CHAN_2K_HASH:
833 	case DF4p5_NPS1_10CHAN_2K_HASH:
834 	case DF4p5_NPS2_5CHAN_2K_HASH:
835 		temp_addr_a = FIELD_GET(GENMASK_ULL(11, 9), addr) << 8;
836 		break;
837 
838 	default:
839 		atl_debug_on_bad_intlv_mode(ctx);
840 		return 0;
841 	}
842 
843 	switch (ctx->map.intlv_mode) {
844 	case DF4p5_NPS0_24CHAN_1K_HASH:
845 		temp_addr_b = FIELD_GET(GENMASK_ULL(63, 13), addr) / denorm_ctx->mod_value;
846 		temp_addr_b <<= 10;
847 		break;
848 
849 	case DF4p5_NPS0_24CHAN_2K_HASH:
850 		temp_addr_b = FIELD_GET(GENMASK_ULL(63, 14), addr) / denorm_ctx->mod_value;
851 		temp_addr_b <<= 11;
852 		break;
853 
854 	case DF4p5_NPS1_12CHAN_1K_HASH:
855 		temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) / denorm_ctx->mod_value;
856 		temp_addr_b <<= 10;
857 		break;
858 
859 	case DF4p5_NPS1_12CHAN_2K_HASH:
860 		temp_addr_b = FIELD_GET(GENMASK_ULL(63, 13), addr) / denorm_ctx->mod_value;
861 		temp_addr_b <<= 11;
862 		break;
863 
864 	case DF4p5_NPS2_6CHAN_1K_HASH:
865 	case DF4p5_NPS1_10CHAN_1K_HASH:
866 		temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) << 1;
867 		temp_addr_b |= FIELD_GET(BIT_ULL(9), addr);
868 		temp_addr_b /= denorm_ctx->mod_value;
869 		temp_addr_b <<= 10;
870 		break;
871 
872 	case DF4p5_NPS2_6CHAN_2K_HASH:
873 	case DF4p5_NPS1_10CHAN_2K_HASH:
874 		temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) / denorm_ctx->mod_value;
875 		temp_addr_b <<= 11;
876 		break;
877 
878 	case DF4p5_NPS4_3CHAN_1K_HASH:
879 	case DF4p5_NPS2_5CHAN_1K_HASH:
880 		temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) << 2;
881 		temp_addr_b |= FIELD_GET(GENMASK_ULL(9, 8), addr);
882 		temp_addr_b /= denorm_ctx->mod_value;
883 		temp_addr_b <<= 10;
884 		break;
885 
886 	case DF4p5_NPS4_3CHAN_2K_HASH:
887 	case DF4p5_NPS2_5CHAN_2K_HASH:
888 		temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) << 1;
889 		temp_addr_b |= FIELD_GET(BIT_ULL(8), addr);
890 		temp_addr_b /= denorm_ctx->mod_value;
891 		temp_addr_b <<= 11;
892 		break;
893 
894 	default:
895 		atl_debug_on_bad_intlv_mode(ctx);
896 		return 0;
897 	}
898 
899 	return denorm_ctx->base_denorm_addr | temp_addr_a | temp_addr_b;
900 }
901 
902 static void recalculate_hashed_bits_df4p5_np2(struct addr_ctx *ctx,
903 					      struct df4p5_denorm_ctx *denorm_ctx)
904 {
905 	bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G, hash_ctl_1T, hashed_bit;
906 
907 	if (!denorm_ctx->rehash_vector)
908 		return;
909 
910 	hash_ctl_64k	= FIELD_GET(DF4_HASH_CTL_64K,  ctx->map.ctl);
911 	hash_ctl_2M	= FIELD_GET(DF4_HASH_CTL_2M,   ctx->map.ctl);
912 	hash_ctl_1G	= FIELD_GET(DF4_HASH_CTL_1G,   ctx->map.ctl);
913 	hash_ctl_1T	= FIELD_GET(DF4p5_HASH_CTL_1T, ctx->map.ctl);
914 
915 	if (denorm_ctx->rehash_vector & BIT_ULL(8)) {
916 		hashed_bit  = FIELD_GET(BIT_ULL(8),  denorm_ctx->current_spa);
917 		hashed_bit ^= FIELD_GET(BIT_ULL(14), denorm_ctx->current_spa);
918 		hashed_bit ^= FIELD_GET(BIT_ULL(16), denorm_ctx->current_spa) & hash_ctl_64k;
919 		hashed_bit ^= FIELD_GET(BIT_ULL(21), denorm_ctx->current_spa) & hash_ctl_2M;
920 		hashed_bit ^= FIELD_GET(BIT_ULL(30), denorm_ctx->current_spa) & hash_ctl_1G;
921 		hashed_bit ^= FIELD_GET(BIT_ULL(40), denorm_ctx->current_spa) & hash_ctl_1T;
922 
923 		if (FIELD_GET(BIT_ULL(8), denorm_ctx->current_spa) != hashed_bit)
924 			denorm_ctx->current_spa ^= BIT_ULL(8);
925 	}
926 
927 	if (denorm_ctx->rehash_vector & BIT_ULL(9)) {
928 		hashed_bit  = FIELD_GET(BIT_ULL(9),  denorm_ctx->current_spa);
929 		hashed_bit ^= FIELD_GET(BIT_ULL(17), denorm_ctx->current_spa) & hash_ctl_64k;
930 		hashed_bit ^= FIELD_GET(BIT_ULL(22), denorm_ctx->current_spa) & hash_ctl_2M;
931 		hashed_bit ^= FIELD_GET(BIT_ULL(31), denorm_ctx->current_spa) & hash_ctl_1G;
932 		hashed_bit ^= FIELD_GET(BIT_ULL(41), denorm_ctx->current_spa) & hash_ctl_1T;
933 
934 		if (FIELD_GET(BIT_ULL(9), denorm_ctx->current_spa) != hashed_bit)
935 			denorm_ctx->current_spa ^= BIT_ULL(9);
936 	}
937 
938 	if (denorm_ctx->rehash_vector & BIT_ULL(12)) {
939 		hashed_bit  = FIELD_GET(BIT_ULL(12), denorm_ctx->current_spa);
940 		hashed_bit ^= FIELD_GET(BIT_ULL(18), denorm_ctx->current_spa) & hash_ctl_64k;
941 		hashed_bit ^= FIELD_GET(BIT_ULL(23), denorm_ctx->current_spa) & hash_ctl_2M;
942 		hashed_bit ^= FIELD_GET(BIT_ULL(32), denorm_ctx->current_spa) & hash_ctl_1G;
943 		hashed_bit ^= FIELD_GET(BIT_ULL(42), denorm_ctx->current_spa) & hash_ctl_1T;
944 
945 		if (FIELD_GET(BIT_ULL(12), denorm_ctx->current_spa) != hashed_bit)
946 			denorm_ctx->current_spa ^= BIT_ULL(12);
947 	}
948 
949 	if (denorm_ctx->rehash_vector & BIT_ULL(13)) {
950 		hashed_bit  = FIELD_GET(BIT_ULL(13), denorm_ctx->current_spa);
951 		hashed_bit ^= FIELD_GET(BIT_ULL(19), denorm_ctx->current_spa) & hash_ctl_64k;
952 		hashed_bit ^= FIELD_GET(BIT_ULL(24), denorm_ctx->current_spa) & hash_ctl_2M;
953 		hashed_bit ^= FIELD_GET(BIT_ULL(33), denorm_ctx->current_spa) & hash_ctl_1G;
954 		hashed_bit ^= FIELD_GET(BIT_ULL(43), denorm_ctx->current_spa) & hash_ctl_1T;
955 
956 		if (FIELD_GET(BIT_ULL(13), denorm_ctx->current_spa) != hashed_bit)
957 			denorm_ctx->current_spa ^= BIT_ULL(13);
958 	}
959 }
960 
961 static bool match_logical_coh_st_fabric_id(struct addr_ctx *ctx,
962 					   struct df4p5_denorm_ctx *denorm_ctx)
963 {
964 	/*
965 	 * The logical CS fabric ID of the permutation must be calculated from the
966 	 * current SPA with the base and with the MMIO hole.
967 	 */
968 	u16 id = get_logical_coh_st_fabric_id_for_current_spa(ctx, denorm_ctx);
969 
970 	atl_debug(ctx, "Checking calculated logical coherent station fabric id:\n");
971 	atl_debug(ctx, "  calculated fabric id         = 0x%x\n", id);
972 	atl_debug(ctx, "  expected fabric id           = 0x%x\n", denorm_ctx->coh_st_fabric_id);
973 
974 	return denorm_ctx->coh_st_fabric_id == id;
975 }
976 
977 static bool match_norm_addr(struct addr_ctx *ctx, struct df4p5_denorm_ctx *denorm_ctx)
978 {
979 	u64 addr = remove_base_and_hole(ctx, denorm_ctx->current_spa);
980 
981 	/*
982 	 * The normalized address must be calculated with the current SPA without
983 	 * the base and without the MMIO hole.
984 	 */
985 	addr = normalize_addr_df4p5_np2(ctx, denorm_ctx, addr);
986 
987 	atl_debug(ctx, "Checking calculated normalized address:\n");
988 	atl_debug(ctx, "  calculated normalized addr = 0x%016llx\n", addr);
989 	atl_debug(ctx, "  expected normalized addr   = 0x%016llx\n", ctx->ret_addr);
990 
991 	return addr == ctx->ret_addr;
992 }
993 
994 static int check_permutations(struct addr_ctx *ctx, struct df4p5_denorm_ctx *denorm_ctx)
995 {
996 	u64 test_perm, temp_addr, denorm_addr, num_perms;
997 	unsigned int dropped_remainder;
998 
999 	denorm_ctx->div_addr *= denorm_ctx->mod_value;
1000 
1001 	/*
1002 	 * The high order bits of num_permutations represent the permutations
1003 	 * of the dropped remainder. This will be either 0-3 or 0-5 depending
1004 	 * on the interleave mode. The low order bits represent the
1005 	 * permutations of other "lost" bits which will be any combination of
1006 	 * 1, 2, or 3 bits depending on the interleave mode.
1007 	 */
1008 	num_perms = denorm_ctx->mod_value << denorm_ctx->perm_shift;
1009 
1010 	for (test_perm = 0; test_perm < num_perms; test_perm++) {
1011 		denorm_addr = denorm_ctx->base_denorm_addr;
1012 		dropped_remainder = test_perm >> denorm_ctx->perm_shift;
1013 		temp_addr = denorm_ctx->div_addr + dropped_remainder;
1014 
1015 		switch (ctx->map.intlv_mode) {
1016 		case DF4p5_NPS0_24CHAN_2K_HASH:
1017 			denorm_addr |= temp_addr << 14;
1018 			break;
1019 
1020 		case DF4p5_NPS0_24CHAN_1K_HASH:
1021 		case DF4p5_NPS1_12CHAN_2K_HASH:
1022 			denorm_addr |= temp_addr << 13;
1023 			break;
1024 
1025 		case DF4p5_NPS1_12CHAN_1K_HASH:
1026 		case DF4p5_NPS2_6CHAN_2K_HASH:
1027 		case DF4p5_NPS1_10CHAN_2K_HASH:
1028 			denorm_addr |= temp_addr << 12;
1029 			break;
1030 
1031 		case DF4p5_NPS2_6CHAN_1K_HASH:
1032 		case DF4p5_NPS1_10CHAN_1K_HASH:
1033 			denorm_addr |= FIELD_GET(BIT_ULL(0), temp_addr) << 9;
1034 			denorm_addr |= FIELD_GET(GENMASK_ULL(63, 1), temp_addr) << 12;
1035 			break;
1036 
1037 		case DF4p5_NPS4_3CHAN_1K_HASH:
1038 		case DF4p5_NPS2_5CHAN_1K_HASH:
1039 			denorm_addr |= FIELD_GET(GENMASK_ULL(1, 0), temp_addr) << 8;
1040 			denorm_addr |= FIELD_GET(GENMASK_ULL(63, 2), (temp_addr)) << 12;
1041 			break;
1042 
1043 		case DF4p5_NPS4_3CHAN_2K_HASH:
1044 		case DF4p5_NPS2_5CHAN_2K_HASH:
1045 			denorm_addr |= FIELD_GET(BIT_ULL(0), temp_addr) << 8;
1046 			denorm_addr |= FIELD_GET(GENMASK_ULL(63, 1), temp_addr) << 12;
1047 			break;
1048 
1049 		default:
1050 			atl_debug_on_bad_intlv_mode(ctx);
1051 			return -EINVAL;
1052 		}
1053 
1054 		switch (ctx->map.intlv_mode) {
1055 		case DF4p5_NPS0_24CHAN_1K_HASH:
1056 			denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8;
1057 			denorm_addr |= FIELD_GET(BIT_ULL(1), test_perm) << 9;
1058 			denorm_addr |= FIELD_GET(BIT_ULL(2), test_perm) << 12;
1059 			break;
1060 
1061 		case DF4p5_NPS0_24CHAN_2K_HASH:
1062 			denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8;
1063 			denorm_addr |= FIELD_GET(BIT_ULL(1), test_perm) << 12;
1064 			denorm_addr |= FIELD_GET(BIT_ULL(2), test_perm) << 13;
1065 			break;
1066 
1067 		case DF4p5_NPS1_12CHAN_2K_HASH:
1068 			denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8;
1069 			denorm_addr |= FIELD_GET(BIT_ULL(1), test_perm) << 12;
1070 			break;
1071 
1072 		case DF4p5_NPS1_12CHAN_1K_HASH:
1073 		case DF4p5_NPS4_3CHAN_1K_HASH:
1074 		case DF4p5_NPS2_5CHAN_1K_HASH:
1075 			denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8;
1076 			denorm_addr |= FIELD_GET(BIT_ULL(1), test_perm) << 9;
1077 			break;
1078 
1079 		case DF4p5_NPS2_6CHAN_1K_HASH:
1080 		case DF4p5_NPS2_6CHAN_2K_HASH:
1081 		case DF4p5_NPS4_3CHAN_2K_HASH:
1082 		case DF4p5_NPS1_10CHAN_1K_HASH:
1083 		case DF4p5_NPS1_10CHAN_2K_HASH:
1084 		case DF4p5_NPS2_5CHAN_2K_HASH:
1085 			denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8;
1086 			break;
1087 
1088 		default:
1089 			atl_debug_on_bad_intlv_mode(ctx);
1090 			return -EINVAL;
1091 		}
1092 
1093 		denorm_ctx->current_spa = add_base_and_hole(ctx, denorm_addr);
1094 		recalculate_hashed_bits_df4p5_np2(ctx, denorm_ctx);
1095 
1096 		atl_debug(ctx, "Checking potential system physical address 0x%016llx\n",
1097 			  denorm_ctx->current_spa);
1098 
1099 		if (!match_logical_coh_st_fabric_id(ctx, denorm_ctx))
1100 			continue;
1101 
1102 		if (!match_norm_addr(ctx, denorm_ctx))
1103 			continue;
1104 
1105 		if (denorm_ctx->resolved_spa == INVALID_SPA ||
1106 		    denorm_ctx->current_spa > denorm_ctx->resolved_spa)
1107 			denorm_ctx->resolved_spa = denorm_ctx->current_spa;
1108 	}
1109 
1110 	if (denorm_ctx->resolved_spa == INVALID_SPA) {
1111 		atl_debug(ctx, "Failed to find valid SPA for normalized address 0x%016llx\n",
1112 			  ctx->ret_addr);
1113 		return -EINVAL;
1114 	}
1115 
1116 	/* Return the resolved SPA without the base, without the MMIO hole */
1117 	ctx->ret_addr = remove_base_and_hole(ctx, denorm_ctx->resolved_spa);
1118 
1119 	return 0;
1120 }
1121 
1122 static int init_df4p5_denorm_ctx(struct addr_ctx *ctx, struct df4p5_denorm_ctx *denorm_ctx)
1123 {
1124 	denorm_ctx->current_spa = INVALID_SPA;
1125 	denorm_ctx->resolved_spa = INVALID_SPA;
1126 
1127 	switch (ctx->map.intlv_mode) {
1128 	case DF4p5_NPS0_24CHAN_1K_HASH:
1129 		denorm_ctx->perm_shift    = 3;
1130 		denorm_ctx->rehash_vector = BIT(8) | BIT(9) | BIT(12);
1131 		break;
1132 
1133 	case DF4p5_NPS0_24CHAN_2K_HASH:
1134 		denorm_ctx->perm_shift    = 3;
1135 		denorm_ctx->rehash_vector = BIT(8) | BIT(12) | BIT(13);
1136 		break;
1137 
1138 	case DF4p5_NPS1_12CHAN_1K_HASH:
1139 		denorm_ctx->perm_shift    = 2;
1140 		denorm_ctx->rehash_vector = BIT(8);
1141 		break;
1142 
1143 	case DF4p5_NPS1_12CHAN_2K_HASH:
1144 		denorm_ctx->perm_shift    = 2;
1145 		denorm_ctx->rehash_vector = BIT(8) | BIT(12);
1146 		break;
1147 
1148 	case DF4p5_NPS2_6CHAN_1K_HASH:
1149 	case DF4p5_NPS2_6CHAN_2K_HASH:
1150 	case DF4p5_NPS1_10CHAN_1K_HASH:
1151 	case DF4p5_NPS1_10CHAN_2K_HASH:
1152 		denorm_ctx->perm_shift    = 1;
1153 		denorm_ctx->rehash_vector = BIT(8);
1154 		break;
1155 
1156 	case DF4p5_NPS4_3CHAN_1K_HASH:
1157 	case DF4p5_NPS2_5CHAN_1K_HASH:
1158 		denorm_ctx->perm_shift    = 2;
1159 		denorm_ctx->rehash_vector = 0;
1160 		break;
1161 
1162 	case DF4p5_NPS4_3CHAN_2K_HASH:
1163 	case DF4p5_NPS2_5CHAN_2K_HASH:
1164 		denorm_ctx->perm_shift    = 1;
1165 		denorm_ctx->rehash_vector = 0;
1166 		break;
1167 
1168 	default:
1169 		atl_debug_on_bad_intlv_mode(ctx);
1170 		return -EINVAL;
1171 	}
1172 
1173 	denorm_ctx->base_denorm_addr = FIELD_GET(GENMASK_ULL(7, 0), ctx->ret_addr);
1174 
1175 	switch (ctx->map.intlv_mode) {
1176 	case DF4p5_NPS0_24CHAN_1K_HASH:
1177 	case DF4p5_NPS1_12CHAN_1K_HASH:
1178 	case DF4p5_NPS2_6CHAN_1K_HASH:
1179 	case DF4p5_NPS4_3CHAN_1K_HASH:
1180 	case DF4p5_NPS1_10CHAN_1K_HASH:
1181 	case DF4p5_NPS2_5CHAN_1K_HASH:
1182 		denorm_ctx->base_denorm_addr |= FIELD_GET(GENMASK_ULL(9, 8), ctx->ret_addr) << 10;
1183 		denorm_ctx->div_addr          = FIELD_GET(GENMASK_ULL(63, 10), ctx->ret_addr);
1184 		break;
1185 
1186 	case DF4p5_NPS0_24CHAN_2K_HASH:
1187 	case DF4p5_NPS1_12CHAN_2K_HASH:
1188 	case DF4p5_NPS2_6CHAN_2K_HASH:
1189 	case DF4p5_NPS4_3CHAN_2K_HASH:
1190 	case DF4p5_NPS1_10CHAN_2K_HASH:
1191 	case DF4p5_NPS2_5CHAN_2K_HASH:
1192 		denorm_ctx->base_denorm_addr |= FIELD_GET(GENMASK_ULL(10, 8), ctx->ret_addr) << 9;
1193 		denorm_ctx->div_addr          = FIELD_GET(GENMASK_ULL(63, 11), ctx->ret_addr);
1194 		break;
1195 
1196 	default:
1197 		atl_debug_on_bad_intlv_mode(ctx);
1198 		return -EINVAL;
1199 	}
1200 
1201 	if (ctx->map.num_intlv_chan % 3 == 0)
1202 		denorm_ctx->mod_value = 3;
1203 	else
1204 		denorm_ctx->mod_value = 5;
1205 
1206 	denorm_ctx->coh_st_fabric_id = get_logical_coh_st_fabric_id(ctx) - get_dst_fabric_id(ctx);
1207 
1208 	atl_debug(ctx, "Initialized df4p5_denorm_ctx:");
1209 	atl_debug(ctx, "  mod_value         = %d", denorm_ctx->mod_value);
1210 	atl_debug(ctx, "  perm_shift        = %d", denorm_ctx->perm_shift);
1211 	atl_debug(ctx, "  rehash_vector     = 0x%x", denorm_ctx->rehash_vector);
1212 	atl_debug(ctx, "  base_denorm_addr  = 0x%016llx", denorm_ctx->base_denorm_addr);
1213 	atl_debug(ctx, "  div_addr          = 0x%016llx", denorm_ctx->div_addr);
1214 	atl_debug(ctx, "  coh_st_fabric_id  = 0x%x", denorm_ctx->coh_st_fabric_id);
1215 
1216 	return 0;
1217 }
1218 
1219 /*
1220  * For DF 4.5, parts of the physical address can be directly pulled from the
1221  * normalized address. The exact bits will differ between interleave modes, but
1222  * using NPS0_24CHAN_1K_HASH as an example, the normalized address consists of
1223  * bits [63:13] (divided by 3), bits [11:10], and bits [7:0] of the system
1224  * physical address.
1225  *
1226  * In this case, there is no way to reconstruct the missing bits (bits 8, 9,
1227  * and 12) from the normalized address. Additionally, when bits [63:13] are
1228  * divided by 3, the remainder is dropped. Determine the proper combination of
1229  * "lost" bits and dropped remainder by iterating through each possible
1230  * permutation of these bits and then normalizing the generated system physical
1231  * addresses. If the normalized address matches the address we are trying to
1232  * translate, then we have found the correct permutation of bits.
1233  */
1234 static int denorm_addr_df4p5_np2(struct addr_ctx *ctx)
1235 {
1236 	struct df4p5_denorm_ctx denorm_ctx;
1237 	int ret = 0;
1238 
1239 	memset(&denorm_ctx, 0, sizeof(denorm_ctx));
1240 
1241 	atl_debug(ctx, "Denormalizing DF 4.5 normalized address 0x%016llx", ctx->ret_addr);
1242 
1243 	ret = init_df4p5_denorm_ctx(ctx, &denorm_ctx);
1244 	if (ret)
1245 		return ret;
1246 
1247 	return check_permutations(ctx, &denorm_ctx);
1248 }
1249 
1250 int denormalize_address(struct addr_ctx *ctx)
1251 {
1252 	switch (ctx->map.intlv_mode) {
1253 	case NONE:
1254 		return 0;
1255 	case DF4_NPS4_3CHAN_HASH:
1256 	case DF4_NPS2_6CHAN_HASH:
1257 	case DF4_NPS1_12CHAN_HASH:
1258 	case DF4_NPS2_5CHAN_HASH:
1259 	case DF4_NPS1_10CHAN_HASH:
1260 		return denorm_addr_df4_np2(ctx);
1261 	case DF4p5_NPS0_24CHAN_1K_HASH:
1262 	case DF4p5_NPS4_3CHAN_1K_HASH:
1263 	case DF4p5_NPS2_6CHAN_1K_HASH:
1264 	case DF4p5_NPS1_12CHAN_1K_HASH:
1265 	case DF4p5_NPS2_5CHAN_1K_HASH:
1266 	case DF4p5_NPS1_10CHAN_1K_HASH:
1267 	case DF4p5_NPS4_3CHAN_2K_HASH:
1268 	case DF4p5_NPS2_6CHAN_2K_HASH:
1269 	case DF4p5_NPS1_12CHAN_2K_HASH:
1270 	case DF4p5_NPS0_24CHAN_2K_HASH:
1271 	case DF4p5_NPS2_5CHAN_2K_HASH:
1272 	case DF4p5_NPS1_10CHAN_2K_HASH:
1273 		return denorm_addr_df4p5_np2(ctx);
1274 	case DF3_6CHAN:
1275 		return denorm_addr_df3_6chan(ctx);
1276 	default:
1277 		return denorm_addr_common(ctx);
1278 	}
1279 }
1280