mce.c (1cc2fd75934454be024cd7609b6d7890de6e724b) | mce.c (923b3cf00b3ffc896543bac99affc0fa8553e41a) |
---|---|
1// SPDX-License-Identifier: GPL-2.0-or-later 2/* 3 * Machine check exception handling. 4 * 5 * Copyright 2013 IBM Corporation 6 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> 7 */ 8 9#undef DEBUG 10#define pr_fmt(fmt) "mce: " fmt 11 12#include <linux/hardirq.h> 13#include <linux/types.h> 14#include <linux/ptrace.h> 15#include <linux/percpu.h> 16#include <linux/export.h> 17#include <linux/irq_work.h> 18#include <linux/extable.h> 19#include <linux/ftrace.h> | 1// SPDX-License-Identifier: GPL-2.0-or-later 2/* 3 * Machine check exception handling. 4 * 5 * Copyright 2013 IBM Corporation 6 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> 7 */ 8 9#undef DEBUG 10#define pr_fmt(fmt) "mce: " fmt 11 12#include <linux/hardirq.h> 13#include <linux/types.h> 14#include <linux/ptrace.h> 15#include <linux/percpu.h> 16#include <linux/export.h> 17#include <linux/irq_work.h> 18#include <linux/extable.h> 19#include <linux/ftrace.h> |
20#include <linux/memblock.h> |
|
20 21#include <asm/machdep.h> 22#include <asm/mce.h> 23#include <asm/nmi.h> 24#include <asm/asm-prototypes.h> 25 | 21 22#include <asm/machdep.h> 23#include <asm/mce.h> 24#include <asm/nmi.h> 25#include <asm/asm-prototypes.h> 26 |
26static DEFINE_PER_CPU(int, mce_nest_count); 27static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event); | 27#include "setup.h" |
28 | 28 |
29/* Queue for delayed MCE events. */ 30static DEFINE_PER_CPU(int, mce_queue_count); 31static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue); 32 33/* Queue for delayed MCE UE events. */ 34static DEFINE_PER_CPU(int, mce_ue_count); 35static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], 36 mce_ue_event_queue); 37 | |
38static void machine_check_process_queued_event(struct irq_work *work); 39static void machine_check_ue_irq_work(struct irq_work *work); 40static void machine_check_ue_event(struct machine_check_event *evt); 41static void machine_process_ue_event(struct work_struct *work); 42 43static struct irq_work mce_event_process_work = { 44 .func = machine_check_process_queued_event, 45}; --- 53 unchanged lines hidden (view full) --- 99/* 100 * Decode and save high level MCE information into per cpu buffer which 101 * is an array of machine_check_event structure. 102 */ 103void save_mce_event(struct pt_regs *regs, long handled, 104 struct mce_error_info *mce_err, 105 uint64_t nip, uint64_t addr, uint64_t phys_addr) 106{ | 29static void machine_check_process_queued_event(struct irq_work *work); 30static void machine_check_ue_irq_work(struct irq_work *work); 31static void machine_check_ue_event(struct machine_check_event *evt); 32static void machine_process_ue_event(struct work_struct *work); 33 34static struct irq_work mce_event_process_work = { 35 .func = machine_check_process_queued_event, 36}; --- 53 unchanged lines hidden (view full) --- 90/* 91 * Decode and save high level MCE information into per cpu buffer which 92 * is an array of machine_check_event structure. 93 */ 94void save_mce_event(struct pt_regs *regs, long handled, 95 struct mce_error_info *mce_err, 96 uint64_t nip, uint64_t addr, uint64_t phys_addr) 97{ |
107 int index = __this_cpu_inc_return(mce_nest_count) - 1; 108 struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]); | 98 int index = local_paca->mce_info->mce_nest_count++; 99 struct machine_check_event *mce; |
109 | 100 |
101 mce = &local_paca->mce_info->mce_event[index]; |
|
110 /* 111 * Return if we don't have enough space to log mce event. 112 * mce_nest_count may go beyond MAX_MC_EVT but that's ok, 113 * the check below will stop buffer overrun. 114 */ 115 if (index >= MAX_MC_EVT) 116 return; 117 --- 69 unchanged lines hidden (view full) --- 187 * get_mce_event() will be called by platform specific machine check 188 * handle routine and in KVM. 189 * When we call get_mce_event(), we are still in interrupt context and 190 * preemption will not be scheduled until ret_from_expect() routine 191 * is called. 192 */ 193int get_mce_event(struct machine_check_event *mce, bool release) 194{ | 102 /* 103 * Return if we don't have enough space to log mce event. 104 * mce_nest_count may go beyond MAX_MC_EVT but that's ok, 105 * the check below will stop buffer overrun. 106 */ 107 if (index >= MAX_MC_EVT) 108 return; 109 --- 69 unchanged lines hidden (view full) --- 179 * get_mce_event() will be called by platform specific machine check 180 * handle routine and in KVM. 181 * When we call get_mce_event(), we are still in interrupt context and 182 * preemption will not be scheduled until ret_from_expect() routine 183 * is called. 184 */ 185int get_mce_event(struct machine_check_event *mce, bool release) 186{ |
195 int index = __this_cpu_read(mce_nest_count) - 1; | 187 int index = local_paca->mce_info->mce_nest_count - 1; |
196 struct machine_check_event *mc_evt; 197 int ret = 0; 198 199 /* Sanity check */ 200 if (index < 0) 201 return ret; 202 203 /* Check if we have MCE info to process. */ 204 if (index < MAX_MC_EVT) { | 188 struct machine_check_event *mc_evt; 189 int ret = 0; 190 191 /* Sanity check */ 192 if (index < 0) 193 return ret; 194 195 /* Check if we have MCE info to process. */ 196 if (index < MAX_MC_EVT) { |
205 mc_evt = this_cpu_ptr(&mce_event[index]); | 197 mc_evt = &local_paca->mce_info->mce_event[index]; |
206 /* Copy the event structure and release the original */ 207 if (mce) 208 *mce = *mc_evt; 209 if (release) 210 mc_evt->in_use = 0; 211 ret = 1; 212 } 213 /* Decrement the count to free the slot. */ 214 if (release) | 198 /* Copy the event structure and release the original */ 199 if (mce) 200 *mce = *mc_evt; 201 if (release) 202 mc_evt->in_use = 0; 203 ret = 1; 204 } 205 /* Decrement the count to free the slot. */ 206 if (release) |
215 __this_cpu_dec(mce_nest_count); | 207 local_paca->mce_info->mce_nest_count--; |
216 217 return ret; 218} 219 220void release_mce_event(void) 221{ 222 get_mce_event(NULL, true); 223} --- 5 unchanged lines hidden (view full) --- 229 230/* 231 * Queue up the MCE event which then can be handled later. 232 */ 233static void machine_check_ue_event(struct machine_check_event *evt) 234{ 235 int index; 236 | 208 209 return ret; 210} 211 212void release_mce_event(void) 213{ 214 get_mce_event(NULL, true); 215} --- 5 unchanged lines hidden (view full) --- 221 222/* 223 * Queue up the MCE event which then can be handled later. 224 */ 225static void machine_check_ue_event(struct machine_check_event *evt) 226{ 227 int index; 228 |
237 index = __this_cpu_inc_return(mce_ue_count) - 1; | 229 index = local_paca->mce_info->mce_ue_count++; |
238 /* If queue is full, just return for now. */ 239 if (index >= MAX_MC_EVT) { | 230 /* If queue is full, just return for now. */ 231 if (index >= MAX_MC_EVT) { |
240 __this_cpu_dec(mce_ue_count); | 232 local_paca->mce_info->mce_ue_count--; |
241 return; 242 } | 233 return; 234 } |
243 memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt)); | 235 memcpy(&local_paca->mce_info->mce_ue_event_queue[index], 236 evt, sizeof(*evt)); |
244 245 /* Queue work to process this event later. */ 246 irq_work_queue(&mce_ue_event_irq_work); 247} 248 249/* 250 * Queue up the MCE event which then can be handled later. 251 */ 252void machine_check_queue_event(void) 253{ 254 int index; 255 struct machine_check_event evt; 256 257 if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) 258 return; 259 | 237 238 /* Queue work to process this event later. */ 239 irq_work_queue(&mce_ue_event_irq_work); 240} 241 242/* 243 * Queue up the MCE event which then can be handled later. 244 */ 245void machine_check_queue_event(void) 246{ 247 int index; 248 struct machine_check_event evt; 249 250 if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) 251 return; 252 |
260 index = __this_cpu_inc_return(mce_queue_count) - 1; | 253 index = local_paca->mce_info->mce_queue_count++; |
261 /* If queue is full, just return for now. */ 262 if (index >= MAX_MC_EVT) { | 254 /* If queue is full, just return for now. */ 255 if (index >= MAX_MC_EVT) { |
263 __this_cpu_dec(mce_queue_count); | 256 local_paca->mce_info->mce_queue_count--; |
264 return; 265 } | 257 return; 258 } |
266 memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt)); | 259 memcpy(&local_paca->mce_info->mce_event_queue[index], 260 &evt, sizeof(evt)); |
267 268 /* Queue irq work to process this event later. */ 269 irq_work_queue(&mce_event_process_work); 270} 271 272void mce_common_process_ue(struct pt_regs *regs, 273 struct mce_error_info *mce_err) 274{ --- 10 unchanged lines hidden (view full) --- 285 * process pending MCE event from the mce event queue. This function will be 286 * called during syscall exit. 287 */ 288static void machine_process_ue_event(struct work_struct *work) 289{ 290 int index; 291 struct machine_check_event *evt; 292 | 261 262 /* Queue irq work to process this event later. */ 263 irq_work_queue(&mce_event_process_work); 264} 265 266void mce_common_process_ue(struct pt_regs *regs, 267 struct mce_error_info *mce_err) 268{ --- 10 unchanged lines hidden (view full) --- 279 * process pending MCE event from the mce event queue. This function will be 280 * called during syscall exit. 281 */ 282static void machine_process_ue_event(struct work_struct *work) 283{ 284 int index; 285 struct machine_check_event *evt; 286 |
293 while (__this_cpu_read(mce_ue_count) > 0) { 294 index = __this_cpu_read(mce_ue_count) - 1; 295 evt = this_cpu_ptr(&mce_ue_event_queue[index]); | 287 while (local_paca->mce_info->mce_ue_count > 0) { 288 index = local_paca->mce_info->mce_ue_count - 1; 289 evt = &local_paca->mce_info->mce_ue_event_queue[index]; |
296 blocking_notifier_call_chain(&mce_notifier_list, 0, evt); 297#ifdef CONFIG_MEMORY_FAILURE 298 /* 299 * This should probably queued elsewhere, but 300 * oh! well 301 * 302 * Don't report this machine check because the caller has a 303 * asked us to ignore the event, it has a fixup handler which 304 * will do the appropriate error handling and reporting. 305 */ 306 if (evt->error_type == MCE_ERROR_TYPE_UE) { 307 if (evt->u.ue_error.ignore_event) { | 290 blocking_notifier_call_chain(&mce_notifier_list, 0, evt); 291#ifdef CONFIG_MEMORY_FAILURE 292 /* 293 * This should probably queued elsewhere, but 294 * oh! well 295 * 296 * Don't report this machine check because the caller has a 297 * asked us to ignore the event, it has a fixup handler which 298 * will do the appropriate error handling and reporting. 299 */ 300 if (evt->error_type == MCE_ERROR_TYPE_UE) { 301 if (evt->u.ue_error.ignore_event) { |
308 __this_cpu_dec(mce_ue_count); | 302 local_paca->mce_info->mce_ue_count--; |
309 continue; 310 } 311 312 if (evt->u.ue_error.physical_address_provided) { 313 unsigned long pfn; 314 315 pfn = evt->u.ue_error.physical_address >> 316 PAGE_SHIFT; 317 memory_failure(pfn, 0); 318 } else 319 pr_warn("Failed to identify bad address from " 320 "where the uncorrectable error (UE) " 321 "was generated\n"); 322 } 323#endif | 303 continue; 304 } 305 306 if (evt->u.ue_error.physical_address_provided) { 307 unsigned long pfn; 308 309 pfn = evt->u.ue_error.physical_address >> 310 PAGE_SHIFT; 311 memory_failure(pfn, 0); 312 } else 313 pr_warn("Failed to identify bad address from " 314 "where the uncorrectable error (UE) " 315 "was generated\n"); 316 } 317#endif |
324 __this_cpu_dec(mce_ue_count); | 318 local_paca->mce_info->mce_ue_count--; |
325 } 326} 327/* 328 * process pending MCE event from the mce event queue. This function will be 329 * called during syscall exit. 330 */ 331static void machine_check_process_queued_event(struct irq_work *work) 332{ 333 int index; 334 struct machine_check_event *evt; 335 336 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); 337 338 /* 339 * For now just print it to console. 340 * TODO: log this error event to FSP or nvram. 341 */ | 319 } 320} 321/* 322 * process pending MCE event from the mce event queue. This function will be 323 * called during syscall exit. 324 */ 325static void machine_check_process_queued_event(struct irq_work *work) 326{ 327 int index; 328 struct machine_check_event *evt; 329 330 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); 331 332 /* 333 * For now just print it to console. 334 * TODO: log this error event to FSP or nvram. 335 */ |
342 while (__this_cpu_read(mce_queue_count) > 0) { 343 index = __this_cpu_read(mce_queue_count) - 1; 344 evt = this_cpu_ptr(&mce_event_queue[index]); | 336 while (local_paca->mce_info->mce_queue_count > 0) { 337 index = local_paca->mce_info->mce_queue_count - 1; 338 evt = &local_paca->mce_info->mce_event_queue[index]; |
345 346 if (evt->error_type == MCE_ERROR_TYPE_UE && 347 evt->u.ue_error.ignore_event) { | 339 340 if (evt->error_type == MCE_ERROR_TYPE_UE && 341 evt->u.ue_error.ignore_event) { |
348 __this_cpu_dec(mce_queue_count); | 342 local_paca->mce_info->mce_queue_count--; |
349 continue; 350 } 351 machine_check_print_event_info(evt, false, false); | 343 continue; 344 } 345 machine_check_print_event_info(evt, false, false); |
352 __this_cpu_dec(mce_queue_count); | 346 local_paca->mce_info->mce_queue_count--; |
353 } 354} 355 356void machine_check_print_event_info(struct machine_check_event *evt, 357 bool user_mode, bool in_guest) 358{ 359 const char *level, *sevstr, *subtype, *err_type, *initiator; 360 uint64_t ea = 0, pa = 0; --- 376 unchanged lines hidden (view full) --- 737 738 if (ppc_md.hmi_exception_early) 739 ppc_md.hmi_exception_early(regs); 740 741 wait_for_tb_resync(); 742 743 return 1; 744} | 347 } 348} 349 350void machine_check_print_event_info(struct machine_check_event *evt, 351 bool user_mode, bool in_guest) 352{ 353 const char *level, *sevstr, *subtype, *err_type, *initiator; 354 uint64_t ea = 0, pa = 0; --- 376 unchanged lines hidden (view full) --- 731 732 if (ppc_md.hmi_exception_early) 733 ppc_md.hmi_exception_early(regs); 734 735 wait_for_tb_resync(); 736 737 return 1; 738} |
739 740void __init mce_init(void) 741{ 742 struct mce_info *mce_info; 743 u64 limit; 744 int i; 745 746 limit = min(ppc64_bolted_size(), ppc64_rma_size); 747 for_each_possible_cpu(i) { 748 mce_info = memblock_alloc_try_nid(sizeof(*mce_info), 749 __alignof__(*mce_info), 750 MEMBLOCK_LOW_LIMIT, 751 limit, cpu_to_node(i)); 752 if (!mce_info) 753 goto err; 754 paca_ptrs[i]->mce_info = mce_info; 755 } 756 return; 757err: 758 panic("Failed to allocate memory for MCE event data\n"); 759} |
|