xref: /linux/drivers/ras/amd/atl/denormalize.c (revision eb01fe7abbe2d0b38824d2a93fdb4cc3eaf2ccc1)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * AMD Address Translation Library
4  *
5  * denormalize.c : Functions to account for interleaving bits
6  *
7  * Copyright (c) 2023, Advanced Micro Devices, Inc.
8  * All Rights Reserved.
9  *
10  * Author: Yazen Ghannam <Yazen.Ghannam@amd.com>
11  */
12 
13 #include "internal.h"
14 
15 /*
16  * Returns the Destination Fabric ID. This is the first (lowest)
17  * COH_ST Fabric ID used within a DRAM Address map.
18  */
19 static u16 get_dst_fabric_id(struct addr_ctx *ctx)
20 {
21 	switch (df_cfg.rev) {
22 	case DF2:	return FIELD_GET(DF2_DST_FABRIC_ID,	ctx->map.limit);
23 	case DF3:	return FIELD_GET(DF3_DST_FABRIC_ID,	ctx->map.limit);
24 	case DF3p5:	return FIELD_GET(DF3p5_DST_FABRIC_ID,	ctx->map.limit);
25 	case DF4:	return FIELD_GET(DF4_DST_FABRIC_ID,	ctx->map.ctl);
26 	case DF4p5:	return FIELD_GET(DF4p5_DST_FABRIC_ID,	ctx->map.ctl);
27 	default:
28 			atl_debug_on_bad_df_rev();
29 			return 0;
30 	}
31 }
32 
33 /*
34  * Make a contiguous gap in address for N bits starting at bit P.
35  *
36  * Example:
37  * address bits:		[20:0]
38  * # of interleave bits    (n):	3
39  * starting interleave bit (p):	8
40  *
41  * expanded address bits:	[20+n : n+p][n+p-1 : p][p-1 : 0]
42  *				[23   :  11][10    : 8][7   : 0]
43  */
44 static u64 make_space_for_coh_st_id_at_intlv_bit(struct addr_ctx *ctx)
45 {
46 	return expand_bits(ctx->map.intlv_bit_pos,
47 			   ctx->map.total_intlv_bits,
48 			   ctx->ret_addr);
49 }
50 
51 /*
52  * Make two gaps in address for N bits.
53  * First gap is a single bit at bit P.
54  * Second gap is the remaining N-1 bits at bit 12.
55  *
56  * Example:
57  * address bits:		[20:0]
58  * # of interleave bits    (n):	3
59  * starting interleave bit (p):	8
60  *
61  * First gap
62  * expanded address bits:	[20+1 : p+1][p][p-1 : 0]
63  *				[21   :   9][8][7   : 0]
64  *
65  * Second gap uses result from first.
66  *				r = n - 1; remaining interleave bits
67  * expanded address bits:	[21+r : 12+r][12+r-1: 12][11 : 0]
68  *				[23   :   14][13    : 12][11 : 0]
69  */
70 static u64 make_space_for_coh_st_id_split_2_1(struct addr_ctx *ctx)
71 {
72 	/* Make a single space at the interleave bit. */
73 	u64 denorm_addr = expand_bits(ctx->map.intlv_bit_pos, 1, ctx->ret_addr);
74 
75 	/* Done if there's only a single interleave bit. */
76 	if (ctx->map.total_intlv_bits <= 1)
77 		return denorm_addr;
78 
79 	/* Make spaces for the remaining interleave bits starting at bit 12. */
80 	return expand_bits(12, ctx->map.total_intlv_bits - 1, denorm_addr);
81 }
82 
83 /*
84  * Make space for CS ID at bits [14:8] as follows:
85  *
86  * 8 channels	-> bits [10:8]
87  * 16 channels	-> bits [11:8]
88  * 32 channels	-> bits [14,11:8]
89  *
90  * 1 die	-> N/A
91  * 2 dies	-> bit  [12]
92  * 4 dies	-> bits [13:12]
93  */
94 static u64 make_space_for_coh_st_id_mi300(struct addr_ctx *ctx)
95 {
96 	u8 num_intlv_bits = ilog2(ctx->map.num_intlv_chan);
97 	u64 denorm_addr;
98 
99 	if (ctx->map.intlv_bit_pos != 8) {
100 		pr_debug("Invalid interleave bit: %u", ctx->map.intlv_bit_pos);
101 		return ~0ULL;
102 	}
103 
104 	/* Channel bits. Covers up to 4 bits at [11:8]. */
105 	denorm_addr = expand_bits(8, min(num_intlv_bits, 4), ctx->ret_addr);
106 
107 	/* Die bits. Always starts at [12]. */
108 	denorm_addr = expand_bits(12, ilog2(ctx->map.num_intlv_dies), denorm_addr);
109 
110 	/* Additional channel bit at [14]. */
111 	if (num_intlv_bits > 4)
112 		denorm_addr = expand_bits(14, 1, denorm_addr);
113 
114 	return denorm_addr;
115 }
116 
117 /*
118  * Take the current calculated address and shift enough bits in the middle
119  * to make a gap where the interleave bits will be inserted.
120  */
121 static u64 make_space_for_coh_st_id(struct addr_ctx *ctx)
122 {
123 	switch (ctx->map.intlv_mode) {
124 	case NOHASH_2CHAN:
125 	case NOHASH_4CHAN:
126 	case NOHASH_8CHAN:
127 	case NOHASH_16CHAN:
128 	case NOHASH_32CHAN:
129 	case DF2_2CHAN_HASH:
130 		return make_space_for_coh_st_id_at_intlv_bit(ctx);
131 
132 	case DF3_COD4_2CHAN_HASH:
133 	case DF3_COD2_4CHAN_HASH:
134 	case DF3_COD1_8CHAN_HASH:
135 	case DF4_NPS4_2CHAN_HASH:
136 	case DF4_NPS2_4CHAN_HASH:
137 	case DF4_NPS1_8CHAN_HASH:
138 	case DF4p5_NPS4_2CHAN_1K_HASH:
139 	case DF4p5_NPS4_2CHAN_2K_HASH:
140 	case DF4p5_NPS2_4CHAN_2K_HASH:
141 	case DF4p5_NPS1_8CHAN_2K_HASH:
142 	case DF4p5_NPS1_16CHAN_2K_HASH:
143 		return make_space_for_coh_st_id_split_2_1(ctx);
144 
145 	case MI3_HASH_8CHAN:
146 	case MI3_HASH_16CHAN:
147 	case MI3_HASH_32CHAN:
148 		return make_space_for_coh_st_id_mi300(ctx);
149 
150 	default:
151 		atl_debug_on_bad_intlv_mode(ctx);
152 		return ~0ULL;
153 	}
154 }
155 
156 static u16 get_coh_st_id_df2(struct addr_ctx *ctx)
157 {
158 	u8 num_socket_intlv_bits = ilog2(ctx->map.num_intlv_sockets);
159 	u8 num_die_intlv_bits = ilog2(ctx->map.num_intlv_dies);
160 	u8 num_intlv_bits;
161 	u16 coh_st_id, mask;
162 
163 	coh_st_id = ctx->coh_st_fabric_id - get_dst_fabric_id(ctx);
164 
165 	/* Channel interleave bits */
166 	num_intlv_bits = order_base_2(ctx->map.num_intlv_chan);
167 	mask = GENMASK(num_intlv_bits - 1, 0);
168 	coh_st_id &= mask;
169 
170 	/* Die interleave bits */
171 	if (num_die_intlv_bits) {
172 		u16 die_bits;
173 
174 		mask = GENMASK(num_die_intlv_bits - 1, 0);
175 		die_bits = ctx->coh_st_fabric_id & df_cfg.die_id_mask;
176 		die_bits >>= df_cfg.die_id_shift;
177 
178 		coh_st_id |= (die_bits & mask) << num_intlv_bits;
179 		num_intlv_bits += num_die_intlv_bits;
180 	}
181 
182 	/* Socket interleave bits */
183 	if (num_socket_intlv_bits) {
184 		u16 socket_bits;
185 
186 		mask = GENMASK(num_socket_intlv_bits - 1, 0);
187 		socket_bits = ctx->coh_st_fabric_id & df_cfg.socket_id_mask;
188 		socket_bits >>= df_cfg.socket_id_shift;
189 
190 		coh_st_id |= (socket_bits & mask) << num_intlv_bits;
191 	}
192 
193 	return coh_st_id;
194 }
195 
196 static u16 get_coh_st_id_df4(struct addr_ctx *ctx)
197 {
198 	/*
199 	 * Start with the original component mask and the number of interleave
200 	 * bits for the channels in this map.
201 	 */
202 	u8 num_intlv_bits = ilog2(ctx->map.num_intlv_chan);
203 	u16 mask = df_cfg.component_id_mask;
204 
205 	u16 socket_bits;
206 
207 	/* Set the derived Coherent Station ID to the input Coherent Station Fabric ID. */
208 	u16 coh_st_id = ctx->coh_st_fabric_id & mask;
209 
210 	/*
211 	 * Subtract the "base" Destination Fabric ID.
212 	 * This accounts for systems with disabled Coherent Stations.
213 	 */
214 	coh_st_id -= get_dst_fabric_id(ctx) & mask;
215 
216 	/*
217 	 * Generate and use a new mask based on the number of bits
218 	 * needed for channel interleaving in this map.
219 	 */
220 	mask = GENMASK(num_intlv_bits - 1, 0);
221 	coh_st_id &= mask;
222 
223 	/* Done if socket interleaving is not enabled. */
224 	if (ctx->map.num_intlv_sockets <= 1)
225 		return coh_st_id;
226 
227 	/*
228 	 * Figure out how many bits are needed for the number of
229 	 * interleaved sockets. And shift the derived Coherent Station ID to account
230 	 * for these.
231 	 */
232 	num_intlv_bits = ilog2(ctx->map.num_intlv_sockets);
233 	coh_st_id <<= num_intlv_bits;
234 
235 	/* Generate a new mask for the socket interleaving bits. */
236 	mask = GENMASK(num_intlv_bits - 1, 0);
237 
238 	/* Get the socket interleave bits from the original Coherent Station Fabric ID. */
239 	socket_bits = (ctx->coh_st_fabric_id & df_cfg.socket_id_mask) >> df_cfg.socket_id_shift;
240 
241 	/* Apply the appropriate socket bits to the derived Coherent Station ID. */
242 	coh_st_id |= socket_bits & mask;
243 
244 	return coh_st_id;
245 }
246 
247 /*
248  * MI300 hash has:
249  * (C)hannel[3:0]	= coh_st_id[3:0]
250  * (S)tack[0]		= coh_st_id[4]
251  * (D)ie[1:0]		= coh_st_id[6:5]
252  *
253  * Hashed coh_st_id is swizzled so that Stack bit is at the end.
254  * coh_st_id = SDDCCCC
255  */
256 static u16 get_coh_st_id_mi300(struct addr_ctx *ctx)
257 {
258 	u8 channel_bits, die_bits, stack_bit;
259 	u16 die_id;
260 
261 	/* Subtract the "base" Destination Fabric ID. */
262 	ctx->coh_st_fabric_id -= get_dst_fabric_id(ctx);
263 
264 	die_id = (ctx->coh_st_fabric_id & df_cfg.die_id_mask) >> df_cfg.die_id_shift;
265 
266 	channel_bits	= FIELD_GET(GENMASK(3, 0), ctx->coh_st_fabric_id);
267 	stack_bit	= FIELD_GET(BIT(4), ctx->coh_st_fabric_id) << 6;
268 	die_bits	= die_id << 4;
269 
270 	return stack_bit | die_bits | channel_bits;
271 }
272 
273 /*
274  * Derive the correct Coherent Station ID that represents the interleave bits
275  * used within the system physical address. This accounts for the
276  * interleave mode, number of interleaved channels/dies/sockets, and
277  * other system/mode-specific bit swizzling.
278  *
279  * Returns:	Coherent Station ID on success.
280  *		All bits set on error.
281  */
282 static u16 calculate_coh_st_id(struct addr_ctx *ctx)
283 {
284 	switch (ctx->map.intlv_mode) {
285 	case NOHASH_2CHAN:
286 	case NOHASH_4CHAN:
287 	case NOHASH_8CHAN:
288 	case NOHASH_16CHAN:
289 	case NOHASH_32CHAN:
290 	case DF3_COD4_2CHAN_HASH:
291 	case DF3_COD2_4CHAN_HASH:
292 	case DF3_COD1_8CHAN_HASH:
293 	case DF2_2CHAN_HASH:
294 		return get_coh_st_id_df2(ctx);
295 
296 	case DF4_NPS4_2CHAN_HASH:
297 	case DF4_NPS2_4CHAN_HASH:
298 	case DF4_NPS1_8CHAN_HASH:
299 	case DF4p5_NPS4_2CHAN_1K_HASH:
300 	case DF4p5_NPS4_2CHAN_2K_HASH:
301 	case DF4p5_NPS2_4CHAN_2K_HASH:
302 	case DF4p5_NPS1_8CHAN_2K_HASH:
303 	case DF4p5_NPS1_16CHAN_2K_HASH:
304 		return get_coh_st_id_df4(ctx);
305 
306 	case MI3_HASH_8CHAN:
307 	case MI3_HASH_16CHAN:
308 	case MI3_HASH_32CHAN:
309 		return get_coh_st_id_mi300(ctx);
310 
311 	/* COH_ST ID is simply the COH_ST Fabric ID adjusted by the Destination Fabric ID. */
312 	case DF4p5_NPS2_4CHAN_1K_HASH:
313 	case DF4p5_NPS1_8CHAN_1K_HASH:
314 	case DF4p5_NPS1_16CHAN_1K_HASH:
315 		return ctx->coh_st_fabric_id - get_dst_fabric_id(ctx);
316 
317 	default:
318 		atl_debug_on_bad_intlv_mode(ctx);
319 		return ~0;
320 	}
321 }
322 
323 static u64 insert_coh_st_id_at_intlv_bit(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id)
324 {
325 	return denorm_addr | (coh_st_id << ctx->map.intlv_bit_pos);
326 }
327 
328 static u64 insert_coh_st_id_split_2_1(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id)
329 {
330 	/* Insert coh_st_id[0] at the interleave bit. */
331 	denorm_addr |= (coh_st_id & BIT(0)) << ctx->map.intlv_bit_pos;
332 
333 	/* Insert coh_st_id[2:1] at bit 12. */
334 	denorm_addr |= (coh_st_id & GENMASK(2, 1)) << 11;
335 
336 	return denorm_addr;
337 }
338 
339 static u64 insert_coh_st_id_split_2_2(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id)
340 {
341 	/* Insert coh_st_id[1:0] at bit 8. */
342 	denorm_addr |= (coh_st_id & GENMASK(1, 0)) << 8;
343 
344 	/*
345 	 * Insert coh_st_id[n:2] at bit 12. 'n' could be 2 or 3.
346 	 * Grab both because bit 3 will be clear if unused.
347 	 */
348 	denorm_addr |= (coh_st_id & GENMASK(3, 2)) << 10;
349 
350 	return denorm_addr;
351 }
352 
353 static u64 insert_coh_st_id(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id)
354 {
355 	switch (ctx->map.intlv_mode) {
356 	case NOHASH_2CHAN:
357 	case NOHASH_4CHAN:
358 	case NOHASH_8CHAN:
359 	case NOHASH_16CHAN:
360 	case NOHASH_32CHAN:
361 	case MI3_HASH_8CHAN:
362 	case MI3_HASH_16CHAN:
363 	case MI3_HASH_32CHAN:
364 	case DF2_2CHAN_HASH:
365 		return insert_coh_st_id_at_intlv_bit(ctx, denorm_addr, coh_st_id);
366 
367 	case DF3_COD4_2CHAN_HASH:
368 	case DF3_COD2_4CHAN_HASH:
369 	case DF3_COD1_8CHAN_HASH:
370 	case DF4_NPS4_2CHAN_HASH:
371 	case DF4_NPS2_4CHAN_HASH:
372 	case DF4_NPS1_8CHAN_HASH:
373 	case DF4p5_NPS4_2CHAN_1K_HASH:
374 	case DF4p5_NPS4_2CHAN_2K_HASH:
375 	case DF4p5_NPS2_4CHAN_2K_HASH:
376 	case DF4p5_NPS1_8CHAN_2K_HASH:
377 	case DF4p5_NPS1_16CHAN_2K_HASH:
378 		return insert_coh_st_id_split_2_1(ctx, denorm_addr, coh_st_id);
379 
380 	case DF4p5_NPS2_4CHAN_1K_HASH:
381 	case DF4p5_NPS1_8CHAN_1K_HASH:
382 	case DF4p5_NPS1_16CHAN_1K_HASH:
383 		return insert_coh_st_id_split_2_2(ctx, denorm_addr, coh_st_id);
384 
385 	default:
386 		atl_debug_on_bad_intlv_mode(ctx);
387 		return ~0ULL;
388 	}
389 }
390 
391 /*
392  * MI300 systems have a fixed, hardware-defined physical-to-logical
393  * Coherent Station mapping. The Remap registers are not used.
394  */
395 static const u16 phy_to_log_coh_st_map_mi300[] = {
396 	12, 13, 14, 15,
397 	 8,  9, 10, 11,
398 	 4,  5,  6,  7,
399 	 0,  1,  2,  3,
400 	28, 29, 30, 31,
401 	24, 25, 26, 27,
402 	20, 21, 22, 23,
403 	16, 17, 18, 19,
404 };
405 
406 static u16 get_logical_coh_st_fabric_id_mi300(struct addr_ctx *ctx)
407 {
408 	if (ctx->inst_id >= ARRAY_SIZE(phy_to_log_coh_st_map_mi300)) {
409 		atl_debug(ctx, "Instance ID out of range");
410 		return ~0;
411 	}
412 
413 	return phy_to_log_coh_st_map_mi300[ctx->inst_id] | (ctx->node_id << df_cfg.node_id_shift);
414 }
415 
416 static u16 get_logical_coh_st_fabric_id(struct addr_ctx *ctx)
417 {
418 	u16 component_id, log_fabric_id;
419 
420 	/* Start with the physical COH_ST Fabric ID. */
421 	u16 phys_fabric_id = ctx->coh_st_fabric_id;
422 
423 	if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous)
424 		return get_logical_coh_st_fabric_id_mi300(ctx);
425 
426 	/* Skip logical ID lookup if remapping is disabled. */
427 	if (!FIELD_GET(DF4_REMAP_EN, ctx->map.ctl) &&
428 	    ctx->map.intlv_mode != DF3_6CHAN)
429 		return phys_fabric_id;
430 
431 	/* Mask off the Node ID bits to get the "local" Component ID. */
432 	component_id = phys_fabric_id & df_cfg.component_id_mask;
433 
434 	/*
435 	 * Search the list of logical Component IDs for the one that
436 	 * matches this physical Component ID.
437 	 */
438 	for (log_fabric_id = 0; log_fabric_id < MAX_COH_ST_CHANNELS; log_fabric_id++) {
439 		if (ctx->map.remap_array[log_fabric_id] == component_id)
440 			break;
441 	}
442 
443 	if (log_fabric_id == MAX_COH_ST_CHANNELS)
444 		atl_debug(ctx, "COH_ST remap entry not found for 0x%x",
445 			  log_fabric_id);
446 
447 	/* Get the Node ID bits from the physical and apply to the logical. */
448 	return (phys_fabric_id & df_cfg.node_id_mask) | log_fabric_id;
449 }
450 
451 static int denorm_addr_common(struct addr_ctx *ctx)
452 {
453 	u64 denorm_addr;
454 	u16 coh_st_id;
455 
456 	/*
457 	 * Convert the original physical COH_ST Fabric ID to a logical value.
458 	 * This is required for non-power-of-two and other interleaving modes.
459 	 */
460 	ctx->coh_st_fabric_id = get_logical_coh_st_fabric_id(ctx);
461 
462 	denorm_addr = make_space_for_coh_st_id(ctx);
463 	coh_st_id = calculate_coh_st_id(ctx);
464 	ctx->ret_addr = insert_coh_st_id(ctx, denorm_addr, coh_st_id);
465 	return 0;
466 }
467 
468 static int denorm_addr_df3_6chan(struct addr_ctx *ctx)
469 {
470 	u16 coh_st_id = ctx->coh_st_fabric_id & df_cfg.component_id_mask;
471 	u8 total_intlv_bits = ctx->map.total_intlv_bits;
472 	u8 low_bit, intlv_bit = ctx->map.intlv_bit_pos;
473 	u64 msb_intlv_bits, temp_addr_a, temp_addr_b;
474 	u8 np2_bits = ctx->map.np2_bits;
475 
476 	if (ctx->map.intlv_mode != DF3_6CHAN)
477 		return -EINVAL;
478 
479 	/*
480 	 * 'np2_bits' holds the number of bits needed to cover the
481 	 * amount of memory (rounded up) in this map using 64K chunks.
482 	 *
483 	 * Example:
484 	 * Total memory in map:			6GB
485 	 * Rounded up to next power-of-2:	8GB
486 	 * Number of 64K chunks:		0x20000
487 	 * np2_bits = log2(# of chunks):	17
488 	 *
489 	 * Get the two most-significant interleave bits from the
490 	 * input address based on the following:
491 	 *
492 	 * [15 + np2_bits - total_intlv_bits : 14 + np2_bits - total_intlv_bits]
493 	 */
494 	low_bit = 14 + np2_bits - total_intlv_bits;
495 	msb_intlv_bits = ctx->ret_addr >> low_bit;
496 	msb_intlv_bits &= 0x3;
497 
498 	/*
499 	 * If MSB are 11b, then logical COH_ST ID is 6 or 7.
500 	 * Need to adjust based on the mod3 result.
501 	 */
502 	if (msb_intlv_bits == 3) {
503 		u8 addr_mod, phys_addr_msb, msb_coh_st_id;
504 
505 		/* Get the remaining interleave bits from the input address. */
506 		temp_addr_b = GENMASK_ULL(low_bit - 1, intlv_bit) & ctx->ret_addr;
507 		temp_addr_b >>= intlv_bit;
508 
509 		/* Calculate the logical COH_ST offset based on mod3. */
510 		addr_mod = temp_addr_b % 3;
511 
512 		/* Get COH_ST ID bits [2:1]. */
513 		msb_coh_st_id = (coh_st_id >> 1) & 0x3;
514 
515 		/* Get the bit that starts the physical address bits. */
516 		phys_addr_msb = (intlv_bit + np2_bits + 1);
517 		phys_addr_msb &= BIT(0);
518 		phys_addr_msb++;
519 		phys_addr_msb *= 3 - addr_mod + msb_coh_st_id;
520 		phys_addr_msb %= 3;
521 
522 		/* Move the physical address MSB to the correct place. */
523 		temp_addr_b |= phys_addr_msb << (low_bit - total_intlv_bits - intlv_bit);
524 
525 		/* Generate a new COH_ST ID as follows: coh_st_id = [1, 1, coh_st_id[0]] */
526 		coh_st_id &= BIT(0);
527 		coh_st_id |= GENMASK(2, 1);
528 	} else {
529 		temp_addr_b = GENMASK_ULL(63, intlv_bit) & ctx->ret_addr;
530 		temp_addr_b >>= intlv_bit;
531 	}
532 
533 	temp_addr_a = GENMASK_ULL(intlv_bit - 1, 0) & ctx->ret_addr;
534 	temp_addr_b <<= intlv_bit + total_intlv_bits;
535 
536 	ctx->ret_addr = temp_addr_a | temp_addr_b;
537 	ctx->ret_addr |= coh_st_id << intlv_bit;
538 	return 0;
539 }
540 
541 static int denorm_addr_df4_np2(struct addr_ctx *ctx)
542 {
543 	bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G;
544 	u16 group, group_offset, log_coh_st_offset;
545 	unsigned int mod_value, shift_value;
546 	u16 mask = df_cfg.component_id_mask;
547 	u64 temp_addr_a, temp_addr_b;
548 	bool hash_pa8, hashed_bit;
549 
550 	switch (ctx->map.intlv_mode) {
551 	case DF4_NPS4_3CHAN_HASH:
552 		mod_value	= 3;
553 		shift_value	= 13;
554 		break;
555 	case DF4_NPS2_6CHAN_HASH:
556 		mod_value	= 3;
557 		shift_value	= 12;
558 		break;
559 	case DF4_NPS1_12CHAN_HASH:
560 		mod_value	= 3;
561 		shift_value	= 11;
562 		break;
563 	case DF4_NPS2_5CHAN_HASH:
564 		mod_value	= 5;
565 		shift_value	= 13;
566 		break;
567 	case DF4_NPS1_10CHAN_HASH:
568 		mod_value	= 5;
569 		shift_value	= 12;
570 		break;
571 	default:
572 		atl_debug_on_bad_intlv_mode(ctx);
573 		return -EINVAL;
574 	};
575 
576 	if (ctx->map.num_intlv_sockets == 1) {
577 		hash_pa8	= BIT_ULL(shift_value) & ctx->ret_addr;
578 		temp_addr_a	= remove_bits(shift_value, shift_value, ctx->ret_addr);
579 	} else {
580 		hash_pa8	= ctx->coh_st_fabric_id & df_cfg.socket_id_mask;
581 		temp_addr_a	= ctx->ret_addr;
582 	}
583 
584 	/* Make a gap for the real bit [8]. */
585 	temp_addr_a = expand_bits(8, 1, temp_addr_a);
586 
587 	/* Make an additional gap for bits [13:12], as appropriate.*/
588 	if (ctx->map.intlv_mode == DF4_NPS2_6CHAN_HASH ||
589 	    ctx->map.intlv_mode == DF4_NPS1_10CHAN_HASH) {
590 		temp_addr_a = expand_bits(13, 1, temp_addr_a);
591 	} else if (ctx->map.intlv_mode == DF4_NPS1_12CHAN_HASH) {
592 		temp_addr_a = expand_bits(12, 2, temp_addr_a);
593 	}
594 
595 	/* Keep bits [13:0]. */
596 	temp_addr_a &= GENMASK_ULL(13, 0);
597 
598 	/* Get the appropriate high bits. */
599 	shift_value += 1 - ilog2(ctx->map.num_intlv_sockets);
600 	temp_addr_b = GENMASK_ULL(63, shift_value) & ctx->ret_addr;
601 	temp_addr_b >>= shift_value;
602 	temp_addr_b *= mod_value;
603 
604 	/*
605 	 * Coherent Stations are divided into groups.
606 	 *
607 	 * Multiples of 3 (mod3) are divided into quadrants.
608 	 * e.g. NP4_3CHAN ->	[0, 1, 2] [6, 7, 8]
609 	 *			[3, 4, 5] [9, 10, 11]
610 	 *
611 	 * Multiples of 5 (mod5) are divided into sides.
612 	 * e.g. NP2_5CHAN ->	[0, 1, 2, 3, 4] [5, 6, 7, 8, 9]
613 	 */
614 
615 	 /*
616 	  * Calculate the logical offset for the COH_ST within its DRAM Address map.
617 	  * e.g. if map includes [5, 6, 7, 8, 9] and target instance is '8', then
618 	  *	 log_coh_st_offset = 8 - 5 = 3
619 	  */
620 	log_coh_st_offset = (ctx->coh_st_fabric_id & mask) - (get_dst_fabric_id(ctx) & mask);
621 
622 	/*
623 	 * Figure out the group number.
624 	 *
625 	 * Following above example,
626 	 * log_coh_st_offset = 3
627 	 * mod_value = 5
628 	 * group = 3 / 5 = 0
629 	 */
630 	group = log_coh_st_offset / mod_value;
631 
632 	/*
633 	 * Figure out the offset within the group.
634 	 *
635 	 * Following above example,
636 	 * log_coh_st_offset = 3
637 	 * mod_value = 5
638 	 * group_offset = 3 % 5 = 3
639 	 */
640 	group_offset = log_coh_st_offset % mod_value;
641 
642 	/* Adjust group_offset if the hashed bit [8] is set. */
643 	if (hash_pa8) {
644 		if (!group_offset)
645 			group_offset = mod_value - 1;
646 		else
647 			group_offset--;
648 	}
649 
650 	/* Add in the group offset to the high bits. */
651 	temp_addr_b += group_offset;
652 
653 	/* Shift the high bits to the proper starting position. */
654 	temp_addr_b <<= 14;
655 
656 	/* Combine the high and low bits together. */
657 	ctx->ret_addr = temp_addr_a | temp_addr_b;
658 
659 	/* Account for hashing here instead of in dehash_address(). */
660 	hash_ctl_64k	= FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl);
661 	hash_ctl_2M	= FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl);
662 	hash_ctl_1G	= FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl);
663 
664 	hashed_bit = !!hash_pa8;
665 	hashed_bit ^= FIELD_GET(BIT_ULL(14), ctx->ret_addr);
666 	hashed_bit ^= FIELD_GET(BIT_ULL(16), ctx->ret_addr) & hash_ctl_64k;
667 	hashed_bit ^= FIELD_GET(BIT_ULL(21), ctx->ret_addr) & hash_ctl_2M;
668 	hashed_bit ^= FIELD_GET(BIT_ULL(30), ctx->ret_addr) & hash_ctl_1G;
669 
670 	ctx->ret_addr |= hashed_bit << 8;
671 
672 	/* Done for 3 and 5 channel. */
673 	if (ctx->map.intlv_mode == DF4_NPS4_3CHAN_HASH ||
674 	    ctx->map.intlv_mode == DF4_NPS2_5CHAN_HASH)
675 		return 0;
676 
677 	/* Select the proper 'group' bit to use for Bit 13. */
678 	if (ctx->map.intlv_mode == DF4_NPS1_12CHAN_HASH)
679 		hashed_bit = !!(group & BIT(1));
680 	else
681 		hashed_bit = group & BIT(0);
682 
683 	hashed_bit ^= FIELD_GET(BIT_ULL(18), ctx->ret_addr) & hash_ctl_64k;
684 	hashed_bit ^= FIELD_GET(BIT_ULL(23), ctx->ret_addr) & hash_ctl_2M;
685 	hashed_bit ^= FIELD_GET(BIT_ULL(32), ctx->ret_addr) & hash_ctl_1G;
686 
687 	ctx->ret_addr |= hashed_bit << 13;
688 
689 	/* Done for 6 and 10 channel. */
690 	if (ctx->map.intlv_mode != DF4_NPS1_12CHAN_HASH)
691 		return 0;
692 
693 	hashed_bit = group & BIT(0);
694 	hashed_bit ^= FIELD_GET(BIT_ULL(17), ctx->ret_addr) & hash_ctl_64k;
695 	hashed_bit ^= FIELD_GET(BIT_ULL(22), ctx->ret_addr) & hash_ctl_2M;
696 	hashed_bit ^= FIELD_GET(BIT_ULL(31), ctx->ret_addr) & hash_ctl_1G;
697 
698 	ctx->ret_addr |= hashed_bit << 12;
699 	return 0;
700 }
701 
702 int denormalize_address(struct addr_ctx *ctx)
703 {
704 	switch (ctx->map.intlv_mode) {
705 	case NONE:
706 		return 0;
707 	case DF4_NPS4_3CHAN_HASH:
708 	case DF4_NPS2_6CHAN_HASH:
709 	case DF4_NPS1_12CHAN_HASH:
710 	case DF4_NPS2_5CHAN_HASH:
711 	case DF4_NPS1_10CHAN_HASH:
712 		return denorm_addr_df4_np2(ctx);
713 	case DF3_6CHAN:
714 		return denorm_addr_df3_6chan(ctx);
715 	default:
716 		return denorm_addr_common(ctx);
717 	}
718 }
719