xref: /linux/arch/powerpc/kernel/mce_power.c (revision 110e6f26af80dfd90b6e5c645b1aed7228aa580d)
1 /*
2  * Machine check exception handling CPU-side for power7 and power8
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17  *
18  * Copyright 2013 IBM Corporation
19  * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
20  */
21 
22 #undef DEBUG
23 #define pr_fmt(fmt) "mce_power: " fmt
24 
25 #include <linux/types.h>
26 #include <linux/ptrace.h>
27 #include <asm/mmu.h>
28 #include <asm/mce.h>
29 #include <asm/machdep.h>
30 
31 static void flush_tlb_206(unsigned int num_sets, unsigned int action)
32 {
33 	unsigned long rb;
34 	unsigned int i;
35 
36 	switch (action) {
37 	case TLB_INVAL_SCOPE_GLOBAL:
38 		rb = TLBIEL_INVAL_SET;
39 		break;
40 	case TLB_INVAL_SCOPE_LPID:
41 		rb = TLBIEL_INVAL_SET_LPID;
42 		break;
43 	default:
44 		BUG();
45 		break;
46 	}
47 
48 	asm volatile("ptesync" : : : "memory");
49 	for (i = 0; i < num_sets; i++) {
50 		asm volatile("tlbiel %0" : : "r" (rb));
51 		rb += 1 << TLBIEL_INVAL_SET_SHIFT;
52 	}
53 	asm volatile("ptesync" : : : "memory");
54 }
55 
56 /*
57  * Generic routines to flush TLB on POWER processors. These routines
58  * are used as flush_tlb hook in the cpu_spec.
59  *
60  * action => TLB_INVAL_SCOPE_GLOBAL:  Invalidate all TLBs.
61  *	     TLB_INVAL_SCOPE_LPID: Invalidate TLB for current LPID.
62  */
63 void __flush_tlb_power7(unsigned int action)
64 {
65 	flush_tlb_206(POWER7_TLB_SETS, action);
66 }
67 
68 void __flush_tlb_power8(unsigned int action)
69 {
70 	flush_tlb_206(POWER8_TLB_SETS, action);
71 }
72 
73 void __flush_tlb_power9(unsigned int action)
74 {
75 	flush_tlb_206(POWER9_TLB_SETS_HASH, action);
76 }
77 
78 
79 /* flush SLBs and reload */
80 static void flush_and_reload_slb(void)
81 {
82 	struct slb_shadow *slb;
83 	unsigned long i, n;
84 
85 	/* Invalidate all SLBs */
86 	asm volatile("slbmte %0,%0; slbia" : : "r" (0));
87 
88 #ifdef CONFIG_KVM_BOOK3S_HANDLER
89 	/*
90 	 * If machine check is hit when in guest or in transition, we will
91 	 * only flush the SLBs and continue.
92 	 */
93 	if (get_paca()->kvm_hstate.in_guest)
94 		return;
95 #endif
96 
97 	/* For host kernel, reload the SLBs from shadow SLB buffer. */
98 	slb = get_slb_shadow();
99 	if (!slb)
100 		return;
101 
102 	n = min_t(u32, be32_to_cpu(slb->persistent), SLB_MIN_SIZE);
103 
104 	/* Load up the SLB entries from shadow SLB */
105 	for (i = 0; i < n; i++) {
106 		unsigned long rb = be64_to_cpu(slb->save_area[i].esid);
107 		unsigned long rs = be64_to_cpu(slb->save_area[i].vsid);
108 
109 		rb = (rb & ~0xFFFul) | i;
110 		asm volatile("slbmte %0,%1" : : "r" (rs), "r" (rb));
111 	}
112 }
113 
114 static long mce_handle_derror(uint64_t dsisr, uint64_t slb_error_bits)
115 {
116 	long handled = 1;
117 
118 	/*
119 	 * flush and reload SLBs for SLB errors and flush TLBs for TLB errors.
120 	 * reset the error bits whenever we handle them so that at the end
121 	 * we can check whether we handled all of them or not.
122 	 * */
123 	if (dsisr & slb_error_bits) {
124 		flush_and_reload_slb();
125 		/* reset error bits */
126 		dsisr &= ~(slb_error_bits);
127 	}
128 	if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) {
129 		if (cur_cpu_spec && cur_cpu_spec->flush_tlb)
130 			cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL);
131 		/* reset error bits */
132 		dsisr &= ~P7_DSISR_MC_TLB_MULTIHIT_MFTLB;
133 	}
134 	/* Any other errors we don't understand? */
135 	if (dsisr & 0xffffffffUL)
136 		handled = 0;
137 
138 	return handled;
139 }
140 
141 static long mce_handle_derror_p7(uint64_t dsisr)
142 {
143 	return mce_handle_derror(dsisr, P7_DSISR_MC_SLB_ERRORS);
144 }
145 
146 static long mce_handle_common_ierror(uint64_t srr1)
147 {
148 	long handled = 0;
149 
150 	switch (P7_SRR1_MC_IFETCH(srr1)) {
151 	case 0:
152 		break;
153 	case P7_SRR1_MC_IFETCH_SLB_PARITY:
154 	case P7_SRR1_MC_IFETCH_SLB_MULTIHIT:
155 		/* flush and reload SLBs for SLB errors. */
156 		flush_and_reload_slb();
157 		handled = 1;
158 		break;
159 	case P7_SRR1_MC_IFETCH_TLB_MULTIHIT:
160 		if (cur_cpu_spec && cur_cpu_spec->flush_tlb) {
161 			cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL);
162 			handled = 1;
163 		}
164 		break;
165 	default:
166 		break;
167 	}
168 
169 	return handled;
170 }
171 
172 static long mce_handle_ierror_p7(uint64_t srr1)
173 {
174 	long handled = 0;
175 
176 	handled = mce_handle_common_ierror(srr1);
177 
178 	if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) {
179 		flush_and_reload_slb();
180 		handled = 1;
181 	}
182 	return handled;
183 }
184 
185 static void mce_get_common_ierror(struct mce_error_info *mce_err, uint64_t srr1)
186 {
187 	switch (P7_SRR1_MC_IFETCH(srr1)) {
188 	case P7_SRR1_MC_IFETCH_SLB_PARITY:
189 		mce_err->error_type = MCE_ERROR_TYPE_SLB;
190 		mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
191 		break;
192 	case P7_SRR1_MC_IFETCH_SLB_MULTIHIT:
193 		mce_err->error_type = MCE_ERROR_TYPE_SLB;
194 		mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
195 		break;
196 	case P7_SRR1_MC_IFETCH_TLB_MULTIHIT:
197 		mce_err->error_type = MCE_ERROR_TYPE_TLB;
198 		mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
199 		break;
200 	case P7_SRR1_MC_IFETCH_UE:
201 	case P7_SRR1_MC_IFETCH_UE_IFU_INTERNAL:
202 		mce_err->error_type = MCE_ERROR_TYPE_UE;
203 		mce_err->u.ue_error_type = MCE_UE_ERROR_IFETCH;
204 		break;
205 	case P7_SRR1_MC_IFETCH_UE_TLB_RELOAD:
206 		mce_err->error_type = MCE_ERROR_TYPE_UE;
207 		mce_err->u.ue_error_type =
208 				MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
209 		break;
210 	}
211 }
212 
213 static void mce_get_ierror_p7(struct mce_error_info *mce_err, uint64_t srr1)
214 {
215 	mce_get_common_ierror(mce_err, srr1);
216 	if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) {
217 		mce_err->error_type = MCE_ERROR_TYPE_SLB;
218 		mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
219 	}
220 }
221 
222 static void mce_get_derror_p7(struct mce_error_info *mce_err, uint64_t dsisr)
223 {
224 	if (dsisr & P7_DSISR_MC_UE) {
225 		mce_err->error_type = MCE_ERROR_TYPE_UE;
226 		mce_err->u.ue_error_type = MCE_UE_ERROR_LOAD_STORE;
227 	} else if (dsisr & P7_DSISR_MC_UE_TABLEWALK) {
228 		mce_err->error_type = MCE_ERROR_TYPE_UE;
229 		mce_err->u.ue_error_type =
230 				MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
231 	} else if (dsisr & P7_DSISR_MC_ERAT_MULTIHIT) {
232 		mce_err->error_type = MCE_ERROR_TYPE_ERAT;
233 		mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
234 	} else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT) {
235 		mce_err->error_type = MCE_ERROR_TYPE_SLB;
236 		mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
237 	} else if (dsisr & P7_DSISR_MC_SLB_PARITY_MFSLB) {
238 		mce_err->error_type = MCE_ERROR_TYPE_SLB;
239 		mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
240 	} else if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) {
241 		mce_err->error_type = MCE_ERROR_TYPE_TLB;
242 		mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
243 	} else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT_PARITY) {
244 		mce_err->error_type = MCE_ERROR_TYPE_SLB;
245 		mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
246 	}
247 }
248 
249 static long mce_handle_ue_error(struct pt_regs *regs)
250 {
251 	long handled = 0;
252 
253 	/*
254 	 * On specific SCOM read via MMIO we may get a machine check
255 	 * exception with SRR0 pointing inside opal. If that is the
256 	 * case OPAL may have recovery address to re-read SCOM data in
257 	 * different way and hence we can recover from this MC.
258 	 */
259 
260 	if (ppc_md.mce_check_early_recovery) {
261 		if (ppc_md.mce_check_early_recovery(regs))
262 			handled = 1;
263 	}
264 	return handled;
265 }
266 
267 long __machine_check_early_realmode_p7(struct pt_regs *regs)
268 {
269 	uint64_t srr1, nip, addr;
270 	long handled = 1;
271 	struct mce_error_info mce_error_info = { 0 };
272 
273 	srr1 = regs->msr;
274 	nip = regs->nip;
275 
276 	/*
277 	 * Handle memory errors depending whether this was a load/store or
278 	 * ifetch exception. Also, populate the mce error_type and
279 	 * type-specific error_type from either SRR1 or DSISR, depending
280 	 * whether this was a load/store or ifetch exception
281 	 */
282 	if (P7_SRR1_MC_LOADSTORE(srr1)) {
283 		handled = mce_handle_derror_p7(regs->dsisr);
284 		mce_get_derror_p7(&mce_error_info, regs->dsisr);
285 		addr = regs->dar;
286 	} else {
287 		handled = mce_handle_ierror_p7(srr1);
288 		mce_get_ierror_p7(&mce_error_info, srr1);
289 		addr = regs->nip;
290 	}
291 
292 	/* Handle UE error. */
293 	if (mce_error_info.error_type == MCE_ERROR_TYPE_UE)
294 		handled = mce_handle_ue_error(regs);
295 
296 	save_mce_event(regs, handled, &mce_error_info, nip, addr);
297 	return handled;
298 }
299 
300 static void mce_get_ierror_p8(struct mce_error_info *mce_err, uint64_t srr1)
301 {
302 	mce_get_common_ierror(mce_err, srr1);
303 	if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) {
304 		mce_err->error_type = MCE_ERROR_TYPE_ERAT;
305 		mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
306 	}
307 }
308 
309 static void mce_get_derror_p8(struct mce_error_info *mce_err, uint64_t dsisr)
310 {
311 	mce_get_derror_p7(mce_err, dsisr);
312 	if (dsisr & P8_DSISR_MC_ERAT_MULTIHIT_SEC) {
313 		mce_err->error_type = MCE_ERROR_TYPE_ERAT;
314 		mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
315 	}
316 }
317 
318 static long mce_handle_ierror_p8(uint64_t srr1)
319 {
320 	long handled = 0;
321 
322 	handled = mce_handle_common_ierror(srr1);
323 
324 	if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) {
325 		flush_and_reload_slb();
326 		handled = 1;
327 	}
328 	return handled;
329 }
330 
331 static long mce_handle_derror_p8(uint64_t dsisr)
332 {
333 	return mce_handle_derror(dsisr, P8_DSISR_MC_SLB_ERRORS);
334 }
335 
336 long __machine_check_early_realmode_p8(struct pt_regs *regs)
337 {
338 	uint64_t srr1, nip, addr;
339 	long handled = 1;
340 	struct mce_error_info mce_error_info = { 0 };
341 
342 	srr1 = regs->msr;
343 	nip = regs->nip;
344 
345 	if (P7_SRR1_MC_LOADSTORE(srr1)) {
346 		handled = mce_handle_derror_p8(regs->dsisr);
347 		mce_get_derror_p8(&mce_error_info, regs->dsisr);
348 		addr = regs->dar;
349 	} else {
350 		handled = mce_handle_ierror_p8(srr1);
351 		mce_get_ierror_p8(&mce_error_info, srr1);
352 		addr = regs->nip;
353 	}
354 
355 	/* Handle UE error. */
356 	if (mce_error_info.error_type == MCE_ERROR_TYPE_UE)
357 		handled = mce_handle_ue_error(regs);
358 
359 	save_mce_event(regs, handled, &mce_error_info, nip, addr);
360 	return handled;
361 }
362