umc.c (87a61237530769d5a7a750fbc747ac0d1b2e18c1) umc.c (3b566b30b41401888ee0e8eb904a1e7a6693794b)
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * AMD Address Translation Library
4 *
5 * umc.c : Unified Memory Controller (UMC) topology helpers
6 *
7 * Copyright (c) 2023, Advanced Micro Devices, Inc.
8 * All Rights Reserved.

--- 225 unchanged lines hidden (view full) ---

234 addr |= FIELD_PREP(MI300_NA_SID, sid);
235
236 pr_debug("Addr=0x%016lx", addr);
237 pr_debug("Bank=%u Row=%u Column=%u PC=%u SID=%u", bank, row, col, pc, sid);
238
239 return addr;
240}
241
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * AMD Address Translation Library
4 *
5 * umc.c : Unified Memory Controller (UMC) topology helpers
6 *
7 * Copyright (c) 2023, Advanced Micro Devices, Inc.
8 * All Rights Reserved.

--- 225 unchanged lines hidden (view full) ---

234 addr |= FIELD_PREP(MI300_NA_SID, sid);
235
236 pr_debug("Addr=0x%016lx", addr);
237 pr_debug("Bank=%u Row=%u Column=%u PC=%u SID=%u", bank, row, col, pc, sid);
238
239 return addr;
240}
241
242/*
243 * When a DRAM ECC error occurs on MI300 systems, it is recommended to retire
244 * all memory within that DRAM row. This applies to the memory with a DRAM
245 * bank.
246 *
247 * To find the memory addresses, loop through permutations of the DRAM column
248 * bits and find the System Physical address of each. The column bits are used
249 * to calculate the intermediate Normalized address, so all permutations should
250 * be checked.
251 *
252 * See amd_atl::convert_dram_to_norm_addr_mi300() for MI300 address formats.
253 */
254#define MI300_NUM_COL BIT(HWEIGHT(MI300_UMC_MCA_COL))
255static void retire_row_mi300(struct atl_err *a_err)
256{
257 unsigned long addr;
258 struct page *p;
259 u8 col;
260
261 for (col = 0; col < MI300_NUM_COL; col++) {
262 a_err->addr &= ~MI300_UMC_MCA_COL;
263 a_err->addr |= FIELD_PREP(MI300_UMC_MCA_COL, col);
264
265 addr = amd_convert_umc_mca_addr_to_sys_addr(a_err);
266 if (IS_ERR_VALUE(addr))
267 continue;
268
269 addr = PHYS_PFN(addr);
270
271 /*
272 * Skip invalid or already poisoned pages to avoid unnecessary
273 * error messages from memory_failure().
274 */
275 p = pfn_to_online_page(addr);
276 if (!p)
277 continue;
278
279 if (PageHWPoison(p))
280 continue;
281
282 memory_failure(addr, 0);
283 }
284}
285
286void amd_retire_dram_row(struct atl_err *a_err)
287{
288 if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous)
289 return retire_row_mi300(a_err);
290}
291EXPORT_SYMBOL_GPL(amd_retire_dram_row);
292
242static unsigned long get_addr(unsigned long addr)
243{
244 if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous)
245 return convert_dram_to_norm_addr_mi300(addr);
246
247 return addr;
248}
249

--- 41 unchanged lines hidden ---
293static unsigned long get_addr(unsigned long addr)
294{
295 if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous)
296 return convert_dram_to_norm_addr_mi300(addr);
297
298 return addr;
299}
300

--- 41 unchanged lines hidden ---