mce.c (1cc2fd75934454be024cd7609b6d7890de6e724b) mce.c (923b3cf00b3ffc896543bac99affc0fa8553e41a)
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Machine check exception handling.
4 *
5 * Copyright 2013 IBM Corporation
6 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
7 */
8
9#undef DEBUG
10#define pr_fmt(fmt) "mce: " fmt
11
12#include <linux/hardirq.h>
13#include <linux/types.h>
14#include <linux/ptrace.h>
15#include <linux/percpu.h>
16#include <linux/export.h>
17#include <linux/irq_work.h>
18#include <linux/extable.h>
19#include <linux/ftrace.h>
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Machine check exception handling.
4 *
5 * Copyright 2013 IBM Corporation
6 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
7 */
8
9#undef DEBUG
10#define pr_fmt(fmt) "mce: " fmt
11
12#include <linux/hardirq.h>
13#include <linux/types.h>
14#include <linux/ptrace.h>
15#include <linux/percpu.h>
16#include <linux/export.h>
17#include <linux/irq_work.h>
18#include <linux/extable.h>
19#include <linux/ftrace.h>
20#include <linux/memblock.h>
20
21#include <asm/machdep.h>
22#include <asm/mce.h>
23#include <asm/nmi.h>
24#include <asm/asm-prototypes.h>
25
21
22#include <asm/machdep.h>
23#include <asm/mce.h>
24#include <asm/nmi.h>
25#include <asm/asm-prototypes.h>
26
26static DEFINE_PER_CPU(int, mce_nest_count);
27static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
27#include "setup.h"
28
28
29/* Queue for delayed MCE events. */
30static DEFINE_PER_CPU(int, mce_queue_count);
31static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
32
33/* Queue for delayed MCE UE events. */
34static DEFINE_PER_CPU(int, mce_ue_count);
35static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT],
36 mce_ue_event_queue);
37
38static void machine_check_process_queued_event(struct irq_work *work);
39static void machine_check_ue_irq_work(struct irq_work *work);
40static void machine_check_ue_event(struct machine_check_event *evt);
41static void machine_process_ue_event(struct work_struct *work);
42
43static struct irq_work mce_event_process_work = {
44 .func = machine_check_process_queued_event,
45};

--- 53 unchanged lines hidden (view full) ---

99/*
100 * Decode and save high level MCE information into per cpu buffer which
101 * is an array of machine_check_event structure.
102 */
103void save_mce_event(struct pt_regs *regs, long handled,
104 struct mce_error_info *mce_err,
105 uint64_t nip, uint64_t addr, uint64_t phys_addr)
106{
29static void machine_check_process_queued_event(struct irq_work *work);
30static void machine_check_ue_irq_work(struct irq_work *work);
31static void machine_check_ue_event(struct machine_check_event *evt);
32static void machine_process_ue_event(struct work_struct *work);
33
34static struct irq_work mce_event_process_work = {
35 .func = machine_check_process_queued_event,
36};

--- 53 unchanged lines hidden (view full) ---

90/*
91 * Decode and save high level MCE information into per cpu buffer which
92 * is an array of machine_check_event structure.
93 */
94void save_mce_event(struct pt_regs *regs, long handled,
95 struct mce_error_info *mce_err,
96 uint64_t nip, uint64_t addr, uint64_t phys_addr)
97{
107 int index = __this_cpu_inc_return(mce_nest_count) - 1;
108 struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]);
98 int index = local_paca->mce_info->mce_nest_count++;
99 struct machine_check_event *mce;
109
100
101 mce = &local_paca->mce_info->mce_event[index];
110 /*
111 * Return if we don't have enough space to log mce event.
112 * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
113 * the check below will stop buffer overrun.
114 */
115 if (index >= MAX_MC_EVT)
116 return;
117

--- 69 unchanged lines hidden (view full) ---

187 * get_mce_event() will be called by platform specific machine check
188 * handle routine and in KVM.
189 * When we call get_mce_event(), we are still in interrupt context and
190 * preemption will not be scheduled until ret_from_expect() routine
191 * is called.
192 */
193int get_mce_event(struct machine_check_event *mce, bool release)
194{
102 /*
103 * Return if we don't have enough space to log mce event.
104 * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
105 * the check below will stop buffer overrun.
106 */
107 if (index >= MAX_MC_EVT)
108 return;
109

--- 69 unchanged lines hidden (view full) ---

179 * get_mce_event() will be called by platform specific machine check
180 * handle routine and in KVM.
181 * When we call get_mce_event(), we are still in interrupt context and
182 * preemption will not be scheduled until ret_from_expect() routine
183 * is called.
184 */
185int get_mce_event(struct machine_check_event *mce, bool release)
186{
195 int index = __this_cpu_read(mce_nest_count) - 1;
187 int index = local_paca->mce_info->mce_nest_count - 1;
196 struct machine_check_event *mc_evt;
197 int ret = 0;
198
199 /* Sanity check */
200 if (index < 0)
201 return ret;
202
203 /* Check if we have MCE info to process. */
204 if (index < MAX_MC_EVT) {
188 struct machine_check_event *mc_evt;
189 int ret = 0;
190
191 /* Sanity check */
192 if (index < 0)
193 return ret;
194
195 /* Check if we have MCE info to process. */
196 if (index < MAX_MC_EVT) {
205 mc_evt = this_cpu_ptr(&mce_event[index]);
197 mc_evt = &local_paca->mce_info->mce_event[index];
206 /* Copy the event structure and release the original */
207 if (mce)
208 *mce = *mc_evt;
209 if (release)
210 mc_evt->in_use = 0;
211 ret = 1;
212 }
213 /* Decrement the count to free the slot. */
214 if (release)
198 /* Copy the event structure and release the original */
199 if (mce)
200 *mce = *mc_evt;
201 if (release)
202 mc_evt->in_use = 0;
203 ret = 1;
204 }
205 /* Decrement the count to free the slot. */
206 if (release)
215 __this_cpu_dec(mce_nest_count);
207 local_paca->mce_info->mce_nest_count--;
216
217 return ret;
218}
219
220void release_mce_event(void)
221{
222 get_mce_event(NULL, true);
223}

--- 5 unchanged lines hidden (view full) ---

229
230/*
231 * Queue up the MCE event which then can be handled later.
232 */
233static void machine_check_ue_event(struct machine_check_event *evt)
234{
235 int index;
236
208
209 return ret;
210}
211
212void release_mce_event(void)
213{
214 get_mce_event(NULL, true);
215}

--- 5 unchanged lines hidden (view full) ---

221
222/*
223 * Queue up the MCE event which then can be handled later.
224 */
225static void machine_check_ue_event(struct machine_check_event *evt)
226{
227 int index;
228
237 index = __this_cpu_inc_return(mce_ue_count) - 1;
229 index = local_paca->mce_info->mce_ue_count++;
238 /* If queue is full, just return for now. */
239 if (index >= MAX_MC_EVT) {
230 /* If queue is full, just return for now. */
231 if (index >= MAX_MC_EVT) {
240 __this_cpu_dec(mce_ue_count);
232 local_paca->mce_info->mce_ue_count--;
241 return;
242 }
233 return;
234 }
243 memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt));
235 memcpy(&local_paca->mce_info->mce_ue_event_queue[index],
236 evt, sizeof(*evt));
244
245 /* Queue work to process this event later. */
246 irq_work_queue(&mce_ue_event_irq_work);
247}
248
249/*
250 * Queue up the MCE event which then can be handled later.
251 */
252void machine_check_queue_event(void)
253{
254 int index;
255 struct machine_check_event evt;
256
257 if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
258 return;
259
237
238 /* Queue work to process this event later. */
239 irq_work_queue(&mce_ue_event_irq_work);
240}
241
242/*
243 * Queue up the MCE event which then can be handled later.
244 */
245void machine_check_queue_event(void)
246{
247 int index;
248 struct machine_check_event evt;
249
250 if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
251 return;
252
260 index = __this_cpu_inc_return(mce_queue_count) - 1;
253 index = local_paca->mce_info->mce_queue_count++;
261 /* If queue is full, just return for now. */
262 if (index >= MAX_MC_EVT) {
254 /* If queue is full, just return for now. */
255 if (index >= MAX_MC_EVT) {
263 __this_cpu_dec(mce_queue_count);
256 local_paca->mce_info->mce_queue_count--;
264 return;
265 }
257 return;
258 }
266 memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt));
259 memcpy(&local_paca->mce_info->mce_event_queue[index],
260 &evt, sizeof(evt));
267
268 /* Queue irq work to process this event later. */
269 irq_work_queue(&mce_event_process_work);
270}
271
272void mce_common_process_ue(struct pt_regs *regs,
273 struct mce_error_info *mce_err)
274{

--- 10 unchanged lines hidden (view full) ---

285 * process pending MCE event from the mce event queue. This function will be
286 * called during syscall exit.
287 */
288static void machine_process_ue_event(struct work_struct *work)
289{
290 int index;
291 struct machine_check_event *evt;
292
261
262 /* Queue irq work to process this event later. */
263 irq_work_queue(&mce_event_process_work);
264}
265
266void mce_common_process_ue(struct pt_regs *regs,
267 struct mce_error_info *mce_err)
268{

--- 10 unchanged lines hidden (view full) ---

279 * process pending MCE event from the mce event queue. This function will be
280 * called during syscall exit.
281 */
282static void machine_process_ue_event(struct work_struct *work)
283{
284 int index;
285 struct machine_check_event *evt;
286
293 while (__this_cpu_read(mce_ue_count) > 0) {
294 index = __this_cpu_read(mce_ue_count) - 1;
295 evt = this_cpu_ptr(&mce_ue_event_queue[index]);
287 while (local_paca->mce_info->mce_ue_count > 0) {
288 index = local_paca->mce_info->mce_ue_count - 1;
289 evt = &local_paca->mce_info->mce_ue_event_queue[index];
296 blocking_notifier_call_chain(&mce_notifier_list, 0, evt);
297#ifdef CONFIG_MEMORY_FAILURE
298 /*
299 * This should probably queued elsewhere, but
300 * oh! well
301 *
302 * Don't report this machine check because the caller has a
303 * asked us to ignore the event, it has a fixup handler which
304 * will do the appropriate error handling and reporting.
305 */
306 if (evt->error_type == MCE_ERROR_TYPE_UE) {
307 if (evt->u.ue_error.ignore_event) {
290 blocking_notifier_call_chain(&mce_notifier_list, 0, evt);
291#ifdef CONFIG_MEMORY_FAILURE
292 /*
293 * This should probably queued elsewhere, but
294 * oh! well
295 *
296 * Don't report this machine check because the caller has a
297 * asked us to ignore the event, it has a fixup handler which
298 * will do the appropriate error handling and reporting.
299 */
300 if (evt->error_type == MCE_ERROR_TYPE_UE) {
301 if (evt->u.ue_error.ignore_event) {
308 __this_cpu_dec(mce_ue_count);
302 local_paca->mce_info->mce_ue_count--;
309 continue;
310 }
311
312 if (evt->u.ue_error.physical_address_provided) {
313 unsigned long pfn;
314
315 pfn = evt->u.ue_error.physical_address >>
316 PAGE_SHIFT;
317 memory_failure(pfn, 0);
318 } else
319 pr_warn("Failed to identify bad address from "
320 "where the uncorrectable error (UE) "
321 "was generated\n");
322 }
323#endif
303 continue;
304 }
305
306 if (evt->u.ue_error.physical_address_provided) {
307 unsigned long pfn;
308
309 pfn = evt->u.ue_error.physical_address >>
310 PAGE_SHIFT;
311 memory_failure(pfn, 0);
312 } else
313 pr_warn("Failed to identify bad address from "
314 "where the uncorrectable error (UE) "
315 "was generated\n");
316 }
317#endif
324 __this_cpu_dec(mce_ue_count);
318 local_paca->mce_info->mce_ue_count--;
325 }
326}
327/*
328 * process pending MCE event from the mce event queue. This function will be
329 * called during syscall exit.
330 */
331static void machine_check_process_queued_event(struct irq_work *work)
332{
333 int index;
334 struct machine_check_event *evt;
335
336 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
337
338 /*
339 * For now just print it to console.
340 * TODO: log this error event to FSP or nvram.
341 */
319 }
320}
321/*
322 * process pending MCE event from the mce event queue. This function will be
323 * called during syscall exit.
324 */
325static void machine_check_process_queued_event(struct irq_work *work)
326{
327 int index;
328 struct machine_check_event *evt;
329
330 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
331
332 /*
333 * For now just print it to console.
334 * TODO: log this error event to FSP or nvram.
335 */
342 while (__this_cpu_read(mce_queue_count) > 0) {
343 index = __this_cpu_read(mce_queue_count) - 1;
344 evt = this_cpu_ptr(&mce_event_queue[index]);
336 while (local_paca->mce_info->mce_queue_count > 0) {
337 index = local_paca->mce_info->mce_queue_count - 1;
338 evt = &local_paca->mce_info->mce_event_queue[index];
345
346 if (evt->error_type == MCE_ERROR_TYPE_UE &&
347 evt->u.ue_error.ignore_event) {
339
340 if (evt->error_type == MCE_ERROR_TYPE_UE &&
341 evt->u.ue_error.ignore_event) {
348 __this_cpu_dec(mce_queue_count);
342 local_paca->mce_info->mce_queue_count--;
349 continue;
350 }
351 machine_check_print_event_info(evt, false, false);
343 continue;
344 }
345 machine_check_print_event_info(evt, false, false);
352 __this_cpu_dec(mce_queue_count);
346 local_paca->mce_info->mce_queue_count--;
353 }
354}
355
356void machine_check_print_event_info(struct machine_check_event *evt,
357 bool user_mode, bool in_guest)
358{
359 const char *level, *sevstr, *subtype, *err_type, *initiator;
360 uint64_t ea = 0, pa = 0;

--- 376 unchanged lines hidden (view full) ---

737
738 if (ppc_md.hmi_exception_early)
739 ppc_md.hmi_exception_early(regs);
740
741 wait_for_tb_resync();
742
743 return 1;
744}
347 }
348}
349
350void machine_check_print_event_info(struct machine_check_event *evt,
351 bool user_mode, bool in_guest)
352{
353 const char *level, *sevstr, *subtype, *err_type, *initiator;
354 uint64_t ea = 0, pa = 0;

--- 376 unchanged lines hidden (view full) ---

731
732 if (ppc_md.hmi_exception_early)
733 ppc_md.hmi_exception_early(regs);
734
735 wait_for_tb_resync();
736
737 return 1;
738}
739
740void __init mce_init(void)
741{
742 struct mce_info *mce_info;
743 u64 limit;
744 int i;
745
746 limit = min(ppc64_bolted_size(), ppc64_rma_size);
747 for_each_possible_cpu(i) {
748 mce_info = memblock_alloc_try_nid(sizeof(*mce_info),
749 __alignof__(*mce_info),
750 MEMBLOCK_LOW_LIMIT,
751 limit, cpu_to_node(i));
752 if (!mce_info)
753 goto err;
754 paca_ptrs[i]->mce_info = mce_info;
755 }
756 return;
757err:
758 panic("Failed to allocate memory for MCE event data\n");
759}