1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * AMD Address Translation Library 4 * 5 * internal.h : Helper functions and common defines 6 * 7 * Copyright (c) 2023, Advanced Micro Devices, Inc. 8 * All Rights Reserved. 9 * 10 * Author: Yazen Ghannam <Yazen.Ghannam@amd.com> 11 */ 12 13 #ifndef __AMD_ATL_INTERNAL_H__ 14 #define __AMD_ATL_INTERNAL_H__ 15 16 #include <linux/bitfield.h> 17 #include <linux/bitops.h> 18 #include <linux/ras.h> 19 20 #include <asm/amd/nb.h> 21 #include <asm/amd/node.h> 22 23 #include "reg_fields.h" 24 25 #undef pr_fmt 26 #define pr_fmt(fmt) "amd_atl: " fmt 27 28 /* Maximum possible number of Coherent Stations within a single Data Fabric. */ 29 #define MAX_COH_ST_CHANNELS 32 30 31 /* PCI ID for Zen4 Server DF Function 0. */ 32 #define DF_FUNC0_ID_ZEN4_SERVER 0x14AD1022 33 34 /* PCI IDs for MI300 DF Function 0. */ 35 #define DF_FUNC0_ID_MI300 0x15281022 36 37 /* Shift needed for adjusting register values to true values. */ 38 #define DF_DRAM_BASE_LIMIT_LSB 28 39 #define MI300_DRAM_LIMIT_LSB 20 40 41 #define INVALID_SPA ~0ULL 42 43 enum df_revisions { 44 UNKNOWN, 45 DF2, 46 DF3, 47 DF3p5, 48 DF4, 49 DF4p5, 50 }; 51 52 /* These are mapped 1:1 to the hardware values. Special cases are set at > 0x20. */ 53 enum intlv_modes { 54 NONE = 0x00, 55 NOHASH_2CHAN = 0x01, 56 NOHASH_4CHAN = 0x03, 57 NOHASH_8CHAN = 0x05, 58 DF3_6CHAN = 0x06, 59 NOHASH_16CHAN = 0x07, 60 NOHASH_32CHAN = 0x08, 61 DF3_COD4_2CHAN_HASH = 0x0C, 62 DF3_COD2_4CHAN_HASH = 0x0D, 63 DF3_COD1_8CHAN_HASH = 0x0E, 64 DF4_NPS4_2CHAN_HASH = 0x10, 65 DF4_NPS2_4CHAN_HASH = 0x11, 66 DF4_NPS1_8CHAN_HASH = 0x12, 67 DF4_NPS4_3CHAN_HASH = 0x13, 68 DF4_NPS2_6CHAN_HASH = 0x14, 69 DF4_NPS1_12CHAN_HASH = 0x15, 70 DF4_NPS2_5CHAN_HASH = 0x16, 71 DF4_NPS1_10CHAN_HASH = 0x17, 72 MI3_HASH_8CHAN = 0x18, 73 MI3_HASH_16CHAN = 0x19, 74 MI3_HASH_32CHAN = 0x1A, 75 DF2_2CHAN_HASH = 0x21, 76 /* DF4.5 modes are all IntLvNumChan + 0x20 */ 77 DF4p5_NPS1_16CHAN_1K_HASH = 0x2C, 78 DF4p5_NPS0_24CHAN_1K_HASH = 0x2E, 79 DF4p5_NPS4_2CHAN_1K_HASH = 0x30, 80 DF4p5_NPS2_4CHAN_1K_HASH = 0x31, 81 DF4p5_NPS1_8CHAN_1K_HASH = 0x32, 82 DF4p5_NPS4_3CHAN_1K_HASH = 0x33, 83 DF4p5_NPS2_6CHAN_1K_HASH = 0x34, 84 DF4p5_NPS1_12CHAN_1K_HASH = 0x35, 85 DF4p5_NPS2_5CHAN_1K_HASH = 0x36, 86 DF4p5_NPS1_10CHAN_1K_HASH = 0x37, 87 DF4p5_NPS4_2CHAN_2K_HASH = 0x40, 88 DF4p5_NPS2_4CHAN_2K_HASH = 0x41, 89 DF4p5_NPS1_8CHAN_2K_HASH = 0x42, 90 DF4p5_NPS1_16CHAN_2K_HASH = 0x43, 91 DF4p5_NPS4_3CHAN_2K_HASH = 0x44, 92 DF4p5_NPS2_6CHAN_2K_HASH = 0x45, 93 DF4p5_NPS1_12CHAN_2K_HASH = 0x46, 94 DF4p5_NPS0_24CHAN_2K_HASH = 0x47, 95 DF4p5_NPS2_5CHAN_2K_HASH = 0x48, 96 DF4p5_NPS1_10CHAN_2K_HASH = 0x49, 97 }; 98 99 struct df4p5_denorm_ctx { 100 /* Indicates the number of "lost" bits. This will be 1, 2, or 3. */ 101 u8 perm_shift; 102 103 /* A mask indicating the bits that need to be rehashed. */ 104 u16 rehash_vector; 105 106 /* 107 * Represents the value that the high bits of the normalized address 108 * are divided by during normalization. This value will be 3 for 109 * interleave modes with a number of channels divisible by 3 or the 110 * value will be 5 for interleave modes with a number of channels 111 * divisible by 5. Power-of-two interleave modes are handled 112 * separately. 113 */ 114 u8 mod_value; 115 116 /* 117 * Represents the bits that can be directly pulled from the normalized 118 * address. In each case, pass through bits [7:0] of the normalized 119 * address. The other bits depend on the interleave bit position which 120 * will be bit 10 for 1K interleave stripe cases and bit 11 for 2K 121 * interleave stripe cases. 122 */ 123 u64 base_denorm_addr; 124 125 /* 126 * Represents the high bits of the physical address that have been 127 * divided by the mod_value. 128 */ 129 u64 div_addr; 130 131 u64 current_spa; 132 u64 resolved_spa; 133 134 u16 coh_st_fabric_id; 135 }; 136 137 struct df_flags { 138 __u8 legacy_ficaa : 1, 139 socket_id_shift_quirk : 1, 140 heterogeneous : 1, 141 prm_only : 1, 142 __reserved_0 : 4; 143 }; 144 145 struct df_config { 146 enum df_revisions rev; 147 148 /* 149 * These masks operate on the 16-bit Coherent Station IDs, 150 * e.g. Instance, Fabric, Destination, etc. 151 */ 152 u16 component_id_mask; 153 u16 die_id_mask; 154 u16 node_id_mask; 155 u16 socket_id_mask; 156 157 /* 158 * Least-significant bit of Node ID portion of the 159 * system-wide Coherent Station Fabric ID. 160 */ 161 u8 node_id_shift; 162 163 /* 164 * Least-significant bit of Die portion of the Node ID. 165 * Adjusted to include the Node ID shift in order to apply 166 * to the Coherent Station Fabric ID. 167 */ 168 u8 die_id_shift; 169 170 /* 171 * Least-significant bit of Socket portion of the Node ID. 172 * Adjusted to include the Node ID shift in order to apply 173 * to the Coherent Station Fabric ID. 174 */ 175 u8 socket_id_shift; 176 177 /* Number of DRAM Address maps visible in a Coherent Station. */ 178 u8 num_coh_st_maps; 179 180 u32 dram_hole_base; 181 182 /* Global flags to handle special cases. */ 183 struct df_flags flags; 184 }; 185 186 extern struct df_config df_cfg; 187 188 struct dram_addr_map { 189 /* 190 * Each DRAM Address Map can operate independently 191 * in different interleaving modes. 192 */ 193 enum intlv_modes intlv_mode; 194 195 /* System-wide number for this address map. */ 196 u8 num; 197 198 /* Raw register values */ 199 u32 base; 200 u32 limit; 201 u32 ctl; 202 u32 intlv; 203 204 /* 205 * Logical to Physical Coherent Station Remapping array 206 * 207 * Index: Logical Coherent Station Instance ID 208 * Value: Physical Coherent Station Instance ID 209 * 210 * phys_coh_st_inst_id = remap_array[log_coh_st_inst_id] 211 */ 212 u8 remap_array[MAX_COH_ST_CHANNELS]; 213 214 /* 215 * Number of bits covering DRAM Address map 0 216 * when interleaving is non-power-of-2. 217 * 218 * Used only for DF3_6CHAN. 219 */ 220 u8 np2_bits; 221 222 /* Position of the 'interleave bit'. */ 223 u8 intlv_bit_pos; 224 /* Number of channels interleaved in this map. */ 225 u8 num_intlv_chan; 226 /* Number of dies interleaved in this map. */ 227 u8 num_intlv_dies; 228 /* Number of sockets interleaved in this map. */ 229 u8 num_intlv_sockets; 230 /* 231 * Total number of channels interleaved accounting 232 * for die and socket interleaving. 233 */ 234 u8 total_intlv_chan; 235 /* Total bits needed to cover 'total_intlv_chan'. */ 236 u8 total_intlv_bits; 237 }; 238 239 /* Original input values cached for debug printing. */ 240 struct addr_ctx_inputs { 241 u64 norm_addr; 242 u8 socket_id; 243 u8 die_id; 244 u8 coh_st_inst_id; 245 }; 246 247 struct addr_ctx { 248 u64 ret_addr; 249 250 struct addr_ctx_inputs inputs; 251 struct dram_addr_map map; 252 253 /* AMD Node ID calculated from Socket and Die IDs. */ 254 u8 node_id; 255 256 /* 257 * Coherent Station Instance ID 258 * Local ID used within a 'node'. 259 */ 260 u16 inst_id; 261 262 /* 263 * Coherent Station Fabric ID 264 * System-wide ID that includes 'node' bits. 265 */ 266 u16 coh_st_fabric_id; 267 }; 268 269 int df_indirect_read_instance(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo); 270 int df_indirect_read_broadcast(u16 node, u8 func, u16 reg, u32 *lo); 271 272 int get_df_system_info(void); 273 int determine_node_id(struct addr_ctx *ctx, u8 socket_num, u8 die_num); 274 int get_umc_info_mi300(void); 275 276 int get_address_map(struct addr_ctx *ctx); 277 278 int denormalize_address(struct addr_ctx *ctx); 279 int dehash_address(struct addr_ctx *ctx); 280 281 unsigned long norm_to_sys_addr(u8 socket_id, u8 die_id, u8 coh_st_inst_id, unsigned long addr); 282 unsigned long convert_umc_mca_addr_to_sys_addr(struct atl_err *err); 283 284 u64 add_base_and_hole(struct addr_ctx *ctx, u64 addr); 285 u64 remove_base_and_hole(struct addr_ctx *ctx, u64 addr); 286 287 /* GUIDs for PRM handlers */ 288 extern const guid_t norm_to_sys_guid; 289 290 #ifdef CONFIG_AMD_ATL_PRM 291 unsigned long prm_umc_norm_to_sys_addr(u8 socket_id, u64 umc_bank_inst_id, unsigned long addr); 292 #else 293 static inline unsigned long prm_umc_norm_to_sys_addr(u8 socket_id, u64 umc_bank_inst_id, 294 unsigned long addr) 295 { 296 return -ENODEV; 297 } 298 #endif 299 300 /* 301 * Make a gap in @data that is @num_bits long starting at @bit_num. 302 * e.g. data = 11111111'b 303 * bit_num = 3 304 * num_bits = 2 305 * result = 1111100111'b 306 */ 307 static inline u64 expand_bits(u8 bit_num, u8 num_bits, u64 data) 308 { 309 u64 temp1, temp2; 310 311 if (!num_bits) 312 return data; 313 314 if (!bit_num) { 315 WARN_ON_ONCE(num_bits >= BITS_PER_LONG); 316 return data << num_bits; 317 } 318 319 WARN_ON_ONCE(bit_num >= BITS_PER_LONG); 320 321 temp1 = data & GENMASK_ULL(bit_num - 1, 0); 322 323 temp2 = data & GENMASK_ULL(63, bit_num); 324 temp2 <<= num_bits; 325 326 return temp1 | temp2; 327 } 328 329 /* 330 * Remove bits in @data between @low_bit and @high_bit inclusive. 331 * e.g. data = XXXYYZZZ'b 332 * low_bit = 3 333 * high_bit = 4 334 * result = XXXZZZ'b 335 */ 336 static inline u64 remove_bits(u8 low_bit, u8 high_bit, u64 data) 337 { 338 u64 temp1, temp2; 339 340 WARN_ON_ONCE(high_bit >= BITS_PER_LONG); 341 WARN_ON_ONCE(low_bit >= BITS_PER_LONG); 342 WARN_ON_ONCE(low_bit > high_bit); 343 344 if (!low_bit) 345 return data >> (high_bit++); 346 347 temp1 = GENMASK_ULL(low_bit - 1, 0) & data; 348 temp2 = GENMASK_ULL(63, high_bit + 1) & data; 349 temp2 >>= high_bit - low_bit + 1; 350 351 return temp1 | temp2; 352 } 353 354 #define atl_debug(ctx, fmt, arg...) \ 355 pr_debug("socket_id=%u die_id=%u coh_st_inst_id=%u norm_addr=0x%016llx: " fmt,\ 356 (ctx)->inputs.socket_id, (ctx)->inputs.die_id,\ 357 (ctx)->inputs.coh_st_inst_id, (ctx)->inputs.norm_addr, ##arg) 358 359 static inline void atl_debug_on_bad_df_rev(void) 360 { 361 pr_debug("Unrecognized DF rev: %u", df_cfg.rev); 362 } 363 364 static inline void atl_debug_on_bad_intlv_mode(struct addr_ctx *ctx) 365 { 366 atl_debug(ctx, "Unrecognized interleave mode: %u", ctx->map.intlv_mode); 367 } 368 369 #define MI300_UMC_MCA_COL GENMASK(5, 1) 370 #define MI300_UMC_MCA_ROW13 BIT(23) 371 372 #endif /* __AMD_ATL_INTERNAL_H__ */ 373