xref: /linux/arch/arm64/kernel/traps.c (revision e3966940559d52aa1800a008dcfeec218dd31f88)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Based on arch/arm/kernel/traps.c
4  *
5  * Copyright (C) 1995-2009 Russell King
6  * Copyright (C) 2012 ARM Ltd.
7  */
8 
9 #include <linux/bug.h>
10 #include <linux/context_tracking.h>
11 #include <linux/signal.h>
12 #include <linux/kallsyms.h>
13 #include <linux/kprobes.h>
14 #include <linux/spinlock.h>
15 #include <linux/uaccess.h>
16 #include <linux/hardirq.h>
17 #include <linux/kdebug.h>
18 #include <linux/module.h>
19 #include <linux/kexec.h>
20 #include <linux/delay.h>
21 #include <linux/efi.h>
22 #include <linux/init.h>
23 #include <linux/sched/signal.h>
24 #include <linux/sched/debug.h>
25 #include <linux/sched/task_stack.h>
26 #include <linux/sizes.h>
27 #include <linux/syscalls.h>
28 #include <linux/mm_types.h>
29 #include <linux/kasan.h>
30 #include <linux/ubsan.h>
31 #include <linux/cfi.h>
32 
33 #include <asm/atomic.h>
34 #include <asm/bug.h>
35 #include <asm/cpufeature.h>
36 #include <asm/daifflags.h>
37 #include <asm/debug-monitors.h>
38 #include <asm/efi.h>
39 #include <asm/esr.h>
40 #include <asm/exception.h>
41 #include <asm/extable.h>
42 #include <asm/insn.h>
43 #include <asm/kprobes.h>
44 #include <asm/text-patching.h>
45 #include <asm/traps.h>
46 #include <asm/smp.h>
47 #include <asm/stack_pointer.h>
48 #include <asm/stacktrace.h>
49 #include <asm/system_misc.h>
50 #include <asm/sysreg.h>
51 
52 static bool __kprobes __check_eq(unsigned long pstate)
53 {
54 	return (pstate & PSR_Z_BIT) != 0;
55 }
56 
57 static bool __kprobes __check_ne(unsigned long pstate)
58 {
59 	return (pstate & PSR_Z_BIT) == 0;
60 }
61 
62 static bool __kprobes __check_cs(unsigned long pstate)
63 {
64 	return (pstate & PSR_C_BIT) != 0;
65 }
66 
67 static bool __kprobes __check_cc(unsigned long pstate)
68 {
69 	return (pstate & PSR_C_BIT) == 0;
70 }
71 
72 static bool __kprobes __check_mi(unsigned long pstate)
73 {
74 	return (pstate & PSR_N_BIT) != 0;
75 }
76 
77 static bool __kprobes __check_pl(unsigned long pstate)
78 {
79 	return (pstate & PSR_N_BIT) == 0;
80 }
81 
82 static bool __kprobes __check_vs(unsigned long pstate)
83 {
84 	return (pstate & PSR_V_BIT) != 0;
85 }
86 
87 static bool __kprobes __check_vc(unsigned long pstate)
88 {
89 	return (pstate & PSR_V_BIT) == 0;
90 }
91 
92 static bool __kprobes __check_hi(unsigned long pstate)
93 {
94 	pstate &= ~(pstate >> 1);	/* PSR_C_BIT &= ~PSR_Z_BIT */
95 	return (pstate & PSR_C_BIT) != 0;
96 }
97 
98 static bool __kprobes __check_ls(unsigned long pstate)
99 {
100 	pstate &= ~(pstate >> 1);	/* PSR_C_BIT &= ~PSR_Z_BIT */
101 	return (pstate & PSR_C_BIT) == 0;
102 }
103 
104 static bool __kprobes __check_ge(unsigned long pstate)
105 {
106 	pstate ^= (pstate << 3);	/* PSR_N_BIT ^= PSR_V_BIT */
107 	return (pstate & PSR_N_BIT) == 0;
108 }
109 
110 static bool __kprobes __check_lt(unsigned long pstate)
111 {
112 	pstate ^= (pstate << 3);	/* PSR_N_BIT ^= PSR_V_BIT */
113 	return (pstate & PSR_N_BIT) != 0;
114 }
115 
116 static bool __kprobes __check_gt(unsigned long pstate)
117 {
118 	/*PSR_N_BIT ^= PSR_V_BIT */
119 	unsigned long temp = pstate ^ (pstate << 3);
120 
121 	temp |= (pstate << 1);	/*PSR_N_BIT |= PSR_Z_BIT */
122 	return (temp & PSR_N_BIT) == 0;
123 }
124 
125 static bool __kprobes __check_le(unsigned long pstate)
126 {
127 	/*PSR_N_BIT ^= PSR_V_BIT */
128 	unsigned long temp = pstate ^ (pstate << 3);
129 
130 	temp |= (pstate << 1);	/*PSR_N_BIT |= PSR_Z_BIT */
131 	return (temp & PSR_N_BIT) != 0;
132 }
133 
134 static bool __kprobes __check_al(unsigned long pstate)
135 {
136 	return true;
137 }
138 
139 /*
140  * Note that the ARMv8 ARM calls condition code 0b1111 "nv", but states that
141  * it behaves identically to 0b1110 ("al").
142  */
143 pstate_check_t * const aarch32_opcode_cond_checks[16] = {
144 	__check_eq, __check_ne, __check_cs, __check_cc,
145 	__check_mi, __check_pl, __check_vs, __check_vc,
146 	__check_hi, __check_ls, __check_ge, __check_lt,
147 	__check_gt, __check_le, __check_al, __check_al
148 };
149 
150 int show_unhandled_signals = 0;
151 
152 void dump_kernel_instr(unsigned long kaddr)
153 {
154 	char str[sizeof("00000000 ") * 5 + 2 + 1], *p = str;
155 	int i;
156 
157 	if (!is_ttbr1_addr(kaddr))
158 		return;
159 
160 	for (i = -4; i < 1; i++) {
161 		unsigned int val, bad;
162 
163 		bad = aarch64_insn_read(&((u32 *)kaddr)[i], &val);
164 
165 		if (!bad)
166 			p += sprintf(p, i == 0 ? "(%08x) " : "%08x ", val);
167 		else
168 			p += sprintf(p, i == 0 ? "(????????) " : "???????? ");
169 	}
170 
171 	printk(KERN_EMERG "Code: %s\n", str);
172 }
173 
174 #define S_SMP " SMP"
175 
176 static int __die(const char *str, long err, struct pt_regs *regs)
177 {
178 	static int die_counter;
179 	int ret;
180 	unsigned long addr = instruction_pointer(regs);
181 
182 	pr_emerg("Internal error: %s: %016lx [#%d] " S_SMP "\n",
183 		 str, err, ++die_counter);
184 
185 	/* trap and error numbers are mostly meaningless on ARM */
186 	ret = notify_die(DIE_OOPS, str, regs, err, 0, SIGSEGV);
187 	if (ret == NOTIFY_STOP)
188 		return ret;
189 
190 	print_modules();
191 	show_regs(regs);
192 
193 	if (user_mode(regs))
194 		return ret;
195 
196 	dump_kernel_instr(addr);
197 
198 	return ret;
199 }
200 
201 static DEFINE_RAW_SPINLOCK(die_lock);
202 
203 /*
204  * This function is protected against re-entrancy.
205  */
206 void die(const char *str, struct pt_regs *regs, long err)
207 {
208 	int ret;
209 	unsigned long flags;
210 
211 	raw_spin_lock_irqsave(&die_lock, flags);
212 
213 	oops_enter();
214 
215 	console_verbose();
216 	bust_spinlocks(1);
217 	ret = __die(str, err, regs);
218 
219 	if (regs && kexec_should_crash(current))
220 		crash_kexec(regs);
221 
222 	bust_spinlocks(0);
223 	add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
224 	oops_exit();
225 
226 	if (in_interrupt())
227 		panic("%s: Fatal exception in interrupt", str);
228 	if (panic_on_oops)
229 		panic("%s: Fatal exception", str);
230 
231 	raw_spin_unlock_irqrestore(&die_lock, flags);
232 
233 	if (ret != NOTIFY_STOP)
234 		make_task_dead(SIGSEGV);
235 }
236 
237 static void arm64_show_signal(int signo, const char *str)
238 {
239 	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
240 				      DEFAULT_RATELIMIT_BURST);
241 	struct task_struct *tsk = current;
242 	unsigned long esr = tsk->thread.fault_code;
243 	struct pt_regs *regs = task_pt_regs(tsk);
244 
245 	/* Leave if the signal won't be shown */
246 	if (!show_unhandled_signals ||
247 	    !unhandled_signal(tsk, signo) ||
248 	    !__ratelimit(&rs))
249 		return;
250 
251 	pr_info("%s[%d]: unhandled exception: ", tsk->comm, task_pid_nr(tsk));
252 	if (esr)
253 		pr_cont("%s, ESR 0x%016lx, ", esr_get_class_string(esr), esr);
254 
255 	pr_cont("%s", str);
256 	print_vma_addr(KERN_CONT " in ", regs->pc);
257 	pr_cont("\n");
258 	__show_regs(regs);
259 }
260 
261 void arm64_force_sig_fault(int signo, int code, unsigned long far,
262 			   const char *str)
263 {
264 	arm64_show_signal(signo, str);
265 	if (signo == SIGKILL)
266 		force_sig(SIGKILL);
267 	else
268 		force_sig_fault(signo, code, (void __user *)far);
269 }
270 
271 void arm64_force_sig_fault_pkey(unsigned long far, const char *str, int pkey)
272 {
273 	arm64_show_signal(SIGSEGV, str);
274 	force_sig_pkuerr((void __user *)far, pkey);
275 }
276 
277 void arm64_force_sig_mceerr(int code, unsigned long far, short lsb,
278 			    const char *str)
279 {
280 	arm64_show_signal(SIGBUS, str);
281 	force_sig_mceerr(code, (void __user *)far, lsb);
282 }
283 
284 void arm64_force_sig_ptrace_errno_trap(int errno, unsigned long far,
285 				       const char *str)
286 {
287 	arm64_show_signal(SIGTRAP, str);
288 	force_sig_ptrace_errno_trap(errno, (void __user *)far);
289 }
290 
291 void arm64_notify_die(const char *str, struct pt_regs *regs,
292 		      int signo, int sicode, unsigned long far,
293 		      unsigned long err)
294 {
295 	if (user_mode(regs)) {
296 		WARN_ON(regs != current_pt_regs());
297 		current->thread.fault_address = 0;
298 		current->thread.fault_code = err;
299 
300 		arm64_force_sig_fault(signo, sicode, far, str);
301 	} else {
302 		die(str, regs, err);
303 	}
304 }
305 
306 #ifdef CONFIG_COMPAT
307 #define PSTATE_IT_1_0_SHIFT	25
308 #define PSTATE_IT_1_0_MASK	(0x3 << PSTATE_IT_1_0_SHIFT)
309 #define PSTATE_IT_7_2_SHIFT	10
310 #define PSTATE_IT_7_2_MASK	(0x3f << PSTATE_IT_7_2_SHIFT)
311 
312 static u32 compat_get_it_state(struct pt_regs *regs)
313 {
314 	u32 it, pstate = regs->pstate;
315 
316 	it  = (pstate & PSTATE_IT_1_0_MASK) >> PSTATE_IT_1_0_SHIFT;
317 	it |= ((pstate & PSTATE_IT_7_2_MASK) >> PSTATE_IT_7_2_SHIFT) << 2;
318 
319 	return it;
320 }
321 
322 static void compat_set_it_state(struct pt_regs *regs, u32 it)
323 {
324 	u32 pstate_it;
325 
326 	pstate_it  = (it << PSTATE_IT_1_0_SHIFT) & PSTATE_IT_1_0_MASK;
327 	pstate_it |= ((it >> 2) << PSTATE_IT_7_2_SHIFT) & PSTATE_IT_7_2_MASK;
328 
329 	regs->pstate &= ~PSR_AA32_IT_MASK;
330 	regs->pstate |= pstate_it;
331 }
332 
333 static void advance_itstate(struct pt_regs *regs)
334 {
335 	u32 it;
336 
337 	/* ARM mode */
338 	if (!(regs->pstate & PSR_AA32_T_BIT) ||
339 	    !(regs->pstate & PSR_AA32_IT_MASK))
340 		return;
341 
342 	it  = compat_get_it_state(regs);
343 
344 	/*
345 	 * If this is the last instruction of the block, wipe the IT
346 	 * state. Otherwise advance it.
347 	 */
348 	if (!(it & 7))
349 		it = 0;
350 	else
351 		it = (it & 0xe0) | ((it << 1) & 0x1f);
352 
353 	compat_set_it_state(regs, it);
354 }
355 #else
356 static void advance_itstate(struct pt_regs *regs)
357 {
358 }
359 #endif
360 
361 void arm64_skip_faulting_instruction(struct pt_regs *regs, unsigned long size)
362 {
363 	regs->pc += size;
364 
365 	/*
366 	 * If we were single stepping, we want to get the step exception after
367 	 * we return from the trap.
368 	 */
369 	if (user_mode(regs))
370 		user_fastforward_single_step(current);
371 
372 	if (compat_user_mode(regs))
373 		advance_itstate(regs);
374 	else
375 		regs->pstate &= ~PSR_BTYPE_MASK;
376 }
377 
378 static int user_insn_read(struct pt_regs *regs, u32 *insnp)
379 {
380 	u32 instr;
381 	unsigned long pc = instruction_pointer(regs);
382 
383 	if (compat_thumb_mode(regs)) {
384 		/* 16-bit Thumb instruction */
385 		__le16 instr_le;
386 		if (get_user(instr_le, (__le16 __user *)pc))
387 			return -EFAULT;
388 		instr = le16_to_cpu(instr_le);
389 		if (aarch32_insn_is_wide(instr)) {
390 			u32 instr2;
391 
392 			if (get_user(instr_le, (__le16 __user *)(pc + 2)))
393 				return -EFAULT;
394 			instr2 = le16_to_cpu(instr_le);
395 			instr = (instr << 16) | instr2;
396 		}
397 	} else {
398 		/* 32-bit ARM instruction */
399 		__le32 instr_le;
400 		if (get_user(instr_le, (__le32 __user *)pc))
401 			return -EFAULT;
402 		instr = le32_to_cpu(instr_le);
403 	}
404 
405 	*insnp = instr;
406 	return 0;
407 }
408 
409 void force_signal_inject(int signal, int code, unsigned long address, unsigned long err)
410 {
411 	const char *desc;
412 	struct pt_regs *regs = current_pt_regs();
413 
414 	if (WARN_ON(!user_mode(regs)))
415 		return;
416 
417 	switch (signal) {
418 	case SIGILL:
419 		desc = "undefined instruction";
420 		break;
421 	case SIGSEGV:
422 		desc = "illegal memory access";
423 		break;
424 	default:
425 		desc = "unknown or unrecoverable error";
426 		break;
427 	}
428 
429 	/* Force signals we don't understand to SIGKILL */
430 	if (WARN_ON(signal != SIGKILL &&
431 		    siginfo_layout(signal, code) != SIL_FAULT)) {
432 		signal = SIGKILL;
433 	}
434 
435 	arm64_notify_die(desc, regs, signal, code, address, err);
436 }
437 
438 /*
439  * Set up process info to signal segmentation fault - called on access error.
440  */
441 void arm64_notify_segfault(unsigned long addr)
442 {
443 	int code;
444 
445 	mmap_read_lock(current->mm);
446 	if (find_vma(current->mm, untagged_addr(addr)) == NULL)
447 		code = SEGV_MAPERR;
448 	else
449 		code = SEGV_ACCERR;
450 	mmap_read_unlock(current->mm);
451 
452 	force_signal_inject(SIGSEGV, code, addr, 0);
453 }
454 
455 void do_el0_undef(struct pt_regs *regs, unsigned long esr)
456 {
457 	u32 insn;
458 
459 	/* check for AArch32 breakpoint instructions */
460 	if (try_handle_aarch32_break(regs))
461 		return;
462 
463 	if (user_insn_read(regs, &insn))
464 		goto out_err;
465 
466 	if (try_emulate_mrs(regs, insn))
467 		return;
468 
469 	if (try_emulate_armv8_deprecated(regs, insn))
470 		return;
471 
472 out_err:
473 	force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
474 }
475 
476 void do_el1_undef(struct pt_regs *regs, unsigned long esr)
477 {
478 	u32 insn;
479 
480 	if (aarch64_insn_read((void *)regs->pc, &insn))
481 		goto out_err;
482 
483 	if (try_emulate_el1_ssbs(regs, insn))
484 		return;
485 
486 out_err:
487 	die("Oops - Undefined instruction", regs, esr);
488 }
489 
490 void do_el0_bti(struct pt_regs *regs)
491 {
492 	force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
493 }
494 
495 void do_el1_bti(struct pt_regs *regs, unsigned long esr)
496 {
497 	if (efi_runtime_fixup_exception(regs, "BTI violation")) {
498 		regs->pstate &= ~PSR_BTYPE_MASK;
499 		return;
500 	}
501 	die("Oops - BTI", regs, esr);
502 }
503 
504 void do_el0_gcs(struct pt_regs *regs, unsigned long esr)
505 {
506 	force_signal_inject(SIGSEGV, SEGV_CPERR, regs->pc, 0);
507 }
508 
509 void do_el1_gcs(struct pt_regs *regs, unsigned long esr)
510 {
511 	die("Oops - GCS", regs, esr);
512 }
513 
514 void do_el0_fpac(struct pt_regs *regs, unsigned long esr)
515 {
516 	force_signal_inject(SIGILL, ILL_ILLOPN, regs->pc, esr);
517 }
518 
519 void do_el1_fpac(struct pt_regs *regs, unsigned long esr)
520 {
521 	/*
522 	 * Unexpected FPAC exception in the kernel: kill the task before it
523 	 * does any more harm.
524 	 */
525 	die("Oops - FPAC", regs, esr);
526 }
527 
528 void do_el0_mops(struct pt_regs *regs, unsigned long esr)
529 {
530 	arm64_mops_reset_regs(&regs->user_regs, esr);
531 
532 	/*
533 	 * If single stepping then finish the step before executing the
534 	 * prologue instruction.
535 	 */
536 	user_fastforward_single_step(current);
537 }
538 
539 void do_el1_mops(struct pt_regs *regs, unsigned long esr)
540 {
541 	arm64_mops_reset_regs(&regs->user_regs, esr);
542 
543 	kernel_fastforward_single_step(regs);
544 }
545 
546 #define __user_cache_maint(insn, address, res)			\
547 	if (address >= TASK_SIZE_MAX) {				\
548 		res = -EFAULT;					\
549 	} else {						\
550 		uaccess_ttbr0_enable();				\
551 		asm volatile (					\
552 			"1:	" insn ", %1\n"			\
553 			"	mov	%w0, #0\n"		\
554 			"2:\n"					\
555 			_ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w0)	\
556 			: "=r" (res)				\
557 			: "r" (address));			\
558 		uaccess_ttbr0_disable();			\
559 	}
560 
561 static void user_cache_maint_handler(unsigned long esr, struct pt_regs *regs)
562 {
563 	unsigned long tagged_address, address;
564 	int rt = ESR_ELx_SYS64_ISS_RT(esr);
565 	int crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT;
566 	int ret = 0;
567 
568 	tagged_address = pt_regs_read_reg(regs, rt);
569 	address = untagged_addr(tagged_address);
570 
571 	switch (crm) {
572 	case ESR_ELx_SYS64_ISS_CRM_DC_CVAU:	/* DC CVAU, gets promoted */
573 		__user_cache_maint("dc civac", address, ret);
574 		break;
575 	case ESR_ELx_SYS64_ISS_CRM_DC_CVAC:	/* DC CVAC, gets promoted */
576 		__user_cache_maint("dc civac", address, ret);
577 		break;
578 	case ESR_ELx_SYS64_ISS_CRM_DC_CVADP:	/* DC CVADP */
579 		__user_cache_maint("sys 3, c7, c13, 1", address, ret);
580 		break;
581 	case ESR_ELx_SYS64_ISS_CRM_DC_CVAP:	/* DC CVAP */
582 		__user_cache_maint("sys 3, c7, c12, 1", address, ret);
583 		break;
584 	case ESR_ELx_SYS64_ISS_CRM_DC_CIVAC:	/* DC CIVAC */
585 		__user_cache_maint("dc civac", address, ret);
586 		break;
587 	case ESR_ELx_SYS64_ISS_CRM_IC_IVAU:	/* IC IVAU */
588 		__user_cache_maint("ic ivau", address, ret);
589 		break;
590 	default:
591 		force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
592 		return;
593 	}
594 
595 	if (ret)
596 		arm64_notify_segfault(tagged_address);
597 	else
598 		arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
599 }
600 
601 static void ctr_read_handler(unsigned long esr, struct pt_regs *regs)
602 {
603 	int rt = ESR_ELx_SYS64_ISS_RT(esr);
604 	unsigned long val = arm64_ftr_reg_user_value(&arm64_ftr_reg_ctrel0);
605 
606 	if (cpus_have_final_cap(ARM64_WORKAROUND_1542419)) {
607 		/* Hide DIC so that we can trap the unnecessary maintenance...*/
608 		val &= ~BIT(CTR_EL0_DIC_SHIFT);
609 
610 		/* ... and fake IminLine to reduce the number of traps. */
611 		val &= ~CTR_EL0_IminLine_MASK;
612 		val |= (PAGE_SHIFT - 2) & CTR_EL0_IminLine_MASK;
613 	}
614 
615 	pt_regs_write_reg(regs, rt, val);
616 
617 	arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
618 }
619 
620 static void cntvct_read_handler(unsigned long esr, struct pt_regs *regs)
621 {
622 	if (test_thread_flag(TIF_TSC_SIGSEGV)) {
623 		force_sig(SIGSEGV);
624 	} else {
625 		int rt = ESR_ELx_SYS64_ISS_RT(esr);
626 
627 		pt_regs_write_reg(regs, rt, arch_timer_read_counter());
628 		arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
629 	}
630 }
631 
632 static void cntfrq_read_handler(unsigned long esr, struct pt_regs *regs)
633 {
634 	if (test_thread_flag(TIF_TSC_SIGSEGV)) {
635 		force_sig(SIGSEGV);
636 	} else {
637 		int rt = ESR_ELx_SYS64_ISS_RT(esr);
638 
639 		pt_regs_write_reg(regs, rt, arch_timer_get_rate());
640 		arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
641 	}
642 }
643 
644 static void mrs_handler(unsigned long esr, struct pt_regs *regs)
645 {
646 	u32 sysreg, rt;
647 
648 	rt = ESR_ELx_SYS64_ISS_RT(esr);
649 	sysreg = esr_sys64_to_sysreg(esr);
650 
651 	if (do_emulate_mrs(regs, sysreg, rt) != 0)
652 		force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
653 }
654 
655 static void wfi_handler(unsigned long esr, struct pt_regs *regs)
656 {
657 	arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
658 }
659 
660 struct sys64_hook {
661 	unsigned long esr_mask;
662 	unsigned long esr_val;
663 	void (*handler)(unsigned long esr, struct pt_regs *regs);
664 };
665 
666 static const struct sys64_hook sys64_hooks[] = {
667 	{
668 		.esr_mask = ESR_ELx_SYS64_ISS_EL0_CACHE_OP_MASK,
669 		.esr_val = ESR_ELx_SYS64_ISS_EL0_CACHE_OP_VAL,
670 		.handler = user_cache_maint_handler,
671 	},
672 	{
673 		/* Trap read access to CTR_EL0 */
674 		.esr_mask = ESR_ELx_SYS64_ISS_SYS_OP_MASK,
675 		.esr_val = ESR_ELx_SYS64_ISS_SYS_CTR_READ,
676 		.handler = ctr_read_handler,
677 	},
678 	{
679 		/* Trap read access to CNTVCT_EL0 */
680 		.esr_mask = ESR_ELx_SYS64_ISS_SYS_OP_MASK,
681 		.esr_val = ESR_ELx_SYS64_ISS_SYS_CNTVCT,
682 		.handler = cntvct_read_handler,
683 	},
684 	{
685 		/* Trap read access to CNTVCTSS_EL0 */
686 		.esr_mask = ESR_ELx_SYS64_ISS_SYS_OP_MASK,
687 		.esr_val = ESR_ELx_SYS64_ISS_SYS_CNTVCTSS,
688 		.handler = cntvct_read_handler,
689 	},
690 	{
691 		/* Trap read access to CNTFRQ_EL0 */
692 		.esr_mask = ESR_ELx_SYS64_ISS_SYS_OP_MASK,
693 		.esr_val = ESR_ELx_SYS64_ISS_SYS_CNTFRQ,
694 		.handler = cntfrq_read_handler,
695 	},
696 	{
697 		/* Trap read access to CPUID registers */
698 		.esr_mask = ESR_ELx_SYS64_ISS_SYS_MRS_OP_MASK,
699 		.esr_val = ESR_ELx_SYS64_ISS_SYS_MRS_OP_VAL,
700 		.handler = mrs_handler,
701 	},
702 	{
703 		/* Trap WFI instructions executed in userspace */
704 		.esr_mask = ESR_ELx_WFx_MASK,
705 		.esr_val = ESR_ELx_WFx_WFI_VAL,
706 		.handler = wfi_handler,
707 	},
708 	{},
709 };
710 
711 #ifdef CONFIG_COMPAT
712 static bool cp15_cond_valid(unsigned long esr, struct pt_regs *regs)
713 {
714 	int cond;
715 
716 	/* Only a T32 instruction can trap without CV being set */
717 	if (!(esr & ESR_ELx_CV)) {
718 		u32 it;
719 
720 		it = compat_get_it_state(regs);
721 		if (!it)
722 			return true;
723 
724 		cond = it >> 4;
725 	} else {
726 		cond = (esr & ESR_ELx_COND_MASK) >> ESR_ELx_COND_SHIFT;
727 	}
728 
729 	return aarch32_opcode_cond_checks[cond](regs->pstate);
730 }
731 
732 static void compat_cntfrq_read_handler(unsigned long esr, struct pt_regs *regs)
733 {
734 	int reg = (esr & ESR_ELx_CP15_32_ISS_RT_MASK) >> ESR_ELx_CP15_32_ISS_RT_SHIFT;
735 
736 	pt_regs_write_reg(regs, reg, arch_timer_get_rate());
737 	arm64_skip_faulting_instruction(regs, 4);
738 }
739 
740 static const struct sys64_hook cp15_32_hooks[] = {
741 	{
742 		.esr_mask = ESR_ELx_CP15_32_ISS_SYS_MASK,
743 		.esr_val = ESR_ELx_CP15_32_ISS_SYS_CNTFRQ,
744 		.handler = compat_cntfrq_read_handler,
745 	},
746 	{},
747 };
748 
749 static void compat_cntvct_read_handler(unsigned long esr, struct pt_regs *regs)
750 {
751 	int rt = (esr & ESR_ELx_CP15_64_ISS_RT_MASK) >> ESR_ELx_CP15_64_ISS_RT_SHIFT;
752 	int rt2 = (esr & ESR_ELx_CP15_64_ISS_RT2_MASK) >> ESR_ELx_CP15_64_ISS_RT2_SHIFT;
753 	u64 val = arch_timer_read_counter();
754 
755 	pt_regs_write_reg(regs, rt, lower_32_bits(val));
756 	pt_regs_write_reg(regs, rt2, upper_32_bits(val));
757 	arm64_skip_faulting_instruction(regs, 4);
758 }
759 
760 static const struct sys64_hook cp15_64_hooks[] = {
761 	{
762 		.esr_mask = ESR_ELx_CP15_64_ISS_SYS_MASK,
763 		.esr_val = ESR_ELx_CP15_64_ISS_SYS_CNTVCT,
764 		.handler = compat_cntvct_read_handler,
765 	},
766 	{
767 		.esr_mask = ESR_ELx_CP15_64_ISS_SYS_MASK,
768 		.esr_val = ESR_ELx_CP15_64_ISS_SYS_CNTVCTSS,
769 		.handler = compat_cntvct_read_handler,
770 	},
771 	{},
772 };
773 
774 void do_el0_cp15(unsigned long esr, struct pt_regs *regs)
775 {
776 	const struct sys64_hook *hook, *hook_base;
777 
778 	if (!cp15_cond_valid(esr, regs)) {
779 		/*
780 		 * There is no T16 variant of a CP access, so we
781 		 * always advance PC by 4 bytes.
782 		 */
783 		arm64_skip_faulting_instruction(regs, 4);
784 		return;
785 	}
786 
787 	switch (ESR_ELx_EC(esr)) {
788 	case ESR_ELx_EC_CP15_32:
789 		hook_base = cp15_32_hooks;
790 		break;
791 	case ESR_ELx_EC_CP15_64:
792 		hook_base = cp15_64_hooks;
793 		break;
794 	default:
795 		do_el0_undef(regs, esr);
796 		return;
797 	}
798 
799 	for (hook = hook_base; hook->handler; hook++)
800 		if ((hook->esr_mask & esr) == hook->esr_val) {
801 			hook->handler(esr, regs);
802 			return;
803 		}
804 
805 	/*
806 	 * New cp15 instructions may previously have been undefined at
807 	 * EL0. Fall back to our usual undefined instruction handler
808 	 * so that we handle these consistently.
809 	 */
810 	do_el0_undef(regs, esr);
811 }
812 #endif
813 
814 void do_el0_sys(unsigned long esr, struct pt_regs *regs)
815 {
816 	const struct sys64_hook *hook;
817 
818 	for (hook = sys64_hooks; hook->handler; hook++)
819 		if ((hook->esr_mask & esr) == hook->esr_val) {
820 			hook->handler(esr, regs);
821 			return;
822 		}
823 
824 	/*
825 	 * New SYS instructions may previously have been undefined at EL0. Fall
826 	 * back to our usual undefined instruction handler so that we handle
827 	 * these consistently.
828 	 */
829 	do_el0_undef(regs, esr);
830 }
831 
832 static const char *esr_class_str[] = {
833 	[0 ... ESR_ELx_EC_MAX]		= "UNRECOGNIZED EC",
834 	[ESR_ELx_EC_UNKNOWN]		= "Unknown/Uncategorized",
835 	[ESR_ELx_EC_WFx]		= "WFI/WFE",
836 	[ESR_ELx_EC_CP15_32]		= "CP15 MCR/MRC",
837 	[ESR_ELx_EC_CP15_64]		= "CP15 MCRR/MRRC",
838 	[ESR_ELx_EC_CP14_MR]		= "CP14 MCR/MRC",
839 	[ESR_ELx_EC_CP14_LS]		= "CP14 LDC/STC",
840 	[ESR_ELx_EC_FP_ASIMD]		= "ASIMD",
841 	[ESR_ELx_EC_CP10_ID]		= "CP10 MRC/VMRS",
842 	[ESR_ELx_EC_PAC]		= "PAC",
843 	[ESR_ELx_EC_CP14_64]		= "CP14 MCRR/MRRC",
844 	[ESR_ELx_EC_BTI]		= "BTI",
845 	[ESR_ELx_EC_ILL]		= "PSTATE.IL",
846 	[ESR_ELx_EC_SVC32]		= "SVC (AArch32)",
847 	[ESR_ELx_EC_HVC32]		= "HVC (AArch32)",
848 	[ESR_ELx_EC_SMC32]		= "SMC (AArch32)",
849 	[ESR_ELx_EC_SVC64]		= "SVC (AArch64)",
850 	[ESR_ELx_EC_HVC64]		= "HVC (AArch64)",
851 	[ESR_ELx_EC_SMC64]		= "SMC (AArch64)",
852 	[ESR_ELx_EC_SYS64]		= "MSR/MRS (AArch64)",
853 	[ESR_ELx_EC_SVE]		= "SVE",
854 	[ESR_ELx_EC_ERET]		= "ERET/ERETAA/ERETAB",
855 	[ESR_ELx_EC_FPAC]		= "FPAC",
856 	[ESR_ELx_EC_SME]		= "SME",
857 	[ESR_ELx_EC_IMP_DEF]		= "EL3 IMP DEF",
858 	[ESR_ELx_EC_IABT_LOW]		= "IABT (lower EL)",
859 	[ESR_ELx_EC_IABT_CUR]		= "IABT (current EL)",
860 	[ESR_ELx_EC_PC_ALIGN]		= "PC Alignment",
861 	[ESR_ELx_EC_DABT_LOW]		= "DABT (lower EL)",
862 	[ESR_ELx_EC_DABT_CUR]		= "DABT (current EL)",
863 	[ESR_ELx_EC_SP_ALIGN]		= "SP Alignment",
864 	[ESR_ELx_EC_MOPS]		= "MOPS",
865 	[ESR_ELx_EC_FP_EXC32]		= "FP (AArch32)",
866 	[ESR_ELx_EC_FP_EXC64]		= "FP (AArch64)",
867 	[ESR_ELx_EC_GCS]		= "Guarded Control Stack",
868 	[ESR_ELx_EC_SERROR]		= "SError",
869 	[ESR_ELx_EC_BREAKPT_LOW]	= "Breakpoint (lower EL)",
870 	[ESR_ELx_EC_BREAKPT_CUR]	= "Breakpoint (current EL)",
871 	[ESR_ELx_EC_SOFTSTP_LOW]	= "Software Step (lower EL)",
872 	[ESR_ELx_EC_SOFTSTP_CUR]	= "Software Step (current EL)",
873 	[ESR_ELx_EC_WATCHPT_LOW]	= "Watchpoint (lower EL)",
874 	[ESR_ELx_EC_WATCHPT_CUR]	= "Watchpoint (current EL)",
875 	[ESR_ELx_EC_BKPT32]		= "BKPT (AArch32)",
876 	[ESR_ELx_EC_VECTOR32]		= "Vector catch (AArch32)",
877 	[ESR_ELx_EC_BRK64]		= "BRK (AArch64)",
878 };
879 
880 const char *esr_get_class_string(unsigned long esr)
881 {
882 	return esr_class_str[ESR_ELx_EC(esr)];
883 }
884 
885 /*
886  * bad_el0_sync handles unexpected, but potentially recoverable synchronous
887  * exceptions taken from EL0.
888  */
889 void bad_el0_sync(struct pt_regs *regs, int reason, unsigned long esr)
890 {
891 	unsigned long pc = instruction_pointer(regs);
892 
893 	current->thread.fault_address = 0;
894 	current->thread.fault_code = esr;
895 
896 	arm64_force_sig_fault(SIGILL, ILL_ILLOPC, pc,
897 			      "Bad EL0 synchronous exception");
898 }
899 
900 DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack)
901 	__aligned(16);
902 
903 void __noreturn panic_bad_stack(struct pt_regs *regs, unsigned long esr, unsigned long far)
904 {
905 	unsigned long tsk_stk = (unsigned long)current->stack;
906 	unsigned long irq_stk = (unsigned long)this_cpu_read(irq_stack_ptr);
907 	unsigned long ovf_stk = (unsigned long)this_cpu_ptr(overflow_stack);
908 
909 	console_verbose();
910 	pr_emerg("Insufficient stack space to handle exception!");
911 
912 	pr_emerg("ESR: 0x%016lx -- %s\n", esr, esr_get_class_string(esr));
913 	pr_emerg("FAR: 0x%016lx\n", far);
914 
915 	pr_emerg("Task stack:     [0x%016lx..0x%016lx]\n",
916 		 tsk_stk, tsk_stk + THREAD_SIZE);
917 	pr_emerg("IRQ stack:      [0x%016lx..0x%016lx]\n",
918 		 irq_stk, irq_stk + IRQ_STACK_SIZE);
919 	pr_emerg("Overflow stack: [0x%016lx..0x%016lx]\n",
920 		 ovf_stk, ovf_stk + OVERFLOW_STACK_SIZE);
921 
922 	__show_regs(regs);
923 
924 	/*
925 	 * We use nmi_panic to limit the potential for recusive overflows, and
926 	 * to get a better stack trace.
927 	 */
928 	nmi_panic(NULL, "kernel stack overflow");
929 	cpu_park_loop();
930 }
931 
932 void __noreturn arm64_serror_panic(struct pt_regs *regs, unsigned long esr)
933 {
934 	add_taint(TAINT_MACHINE_CHECK, LOCKDEP_STILL_OK);
935 	console_verbose();
936 
937 	pr_crit("SError Interrupt on CPU%d, code 0x%016lx -- %s\n",
938 		smp_processor_id(), esr, esr_get_class_string(esr));
939 	if (regs)
940 		__show_regs(regs);
941 
942 	nmi_panic(regs, "Asynchronous SError Interrupt");
943 
944 	cpu_park_loop();
945 }
946 
947 bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned long esr)
948 {
949 	unsigned long aet = arm64_ras_serror_get_severity(esr);
950 
951 	switch (aet) {
952 	case ESR_ELx_AET_CE:	/* corrected error */
953 	case ESR_ELx_AET_UEO:	/* restartable, not yet consumed */
954 		/*
955 		 * The CPU can make progress. We may take UEO again as
956 		 * a more severe error.
957 		 */
958 		return false;
959 
960 	case ESR_ELx_AET_UEU:	/* Uncorrected Unrecoverable */
961 	case ESR_ELx_AET_UER:	/* Uncorrected Recoverable */
962 		/*
963 		 * The CPU can't make progress. The exception may have
964 		 * been imprecise.
965 		 *
966 		 * Neoverse-N1 #1349291 means a non-KVM SError reported as
967 		 * Unrecoverable should be treated as Uncontainable. We
968 		 * call arm64_serror_panic() in both cases.
969 		 */
970 		return true;
971 
972 	case ESR_ELx_AET_UC:	/* Uncontainable or Uncategorized error */
973 	default:
974 		/* Error has been silently propagated */
975 		arm64_serror_panic(regs, esr);
976 	}
977 }
978 
979 void do_serror(struct pt_regs *regs, unsigned long esr)
980 {
981 	/* non-RAS errors are not containable */
982 	if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(regs, esr))
983 		arm64_serror_panic(regs, esr);
984 }
985 
986 /* GENERIC_BUG traps */
987 #ifdef CONFIG_GENERIC_BUG
988 int is_valid_bugaddr(unsigned long addr)
989 {
990 	/*
991 	 * bug_brk_handler() only called for BRK #BUG_BRK_IMM.
992 	 * So the answer is trivial -- any spurious instances with no
993 	 * bug table entry will be rejected by report_bug() and passed
994 	 * back to the debug-monitors code and handled as a fatal
995 	 * unexpected debug exception.
996 	 */
997 	return 1;
998 }
999 #endif
1000 
1001 int bug_brk_handler(struct pt_regs *regs, unsigned long esr)
1002 {
1003 	switch (report_bug(regs->pc, regs)) {
1004 	case BUG_TRAP_TYPE_BUG:
1005 		die("Oops - BUG", regs, esr);
1006 		break;
1007 
1008 	case BUG_TRAP_TYPE_WARN:
1009 		break;
1010 
1011 	default:
1012 		/* unknown/unrecognised bug trap type */
1013 		return DBG_HOOK_ERROR;
1014 	}
1015 
1016 	/* If thread survives, skip over the BUG instruction and continue: */
1017 	arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
1018 	return DBG_HOOK_HANDLED;
1019 }
1020 
1021 #ifdef CONFIG_CFI
1022 int cfi_brk_handler(struct pt_regs *regs, unsigned long esr)
1023 {
1024 	unsigned long target;
1025 	u32 type;
1026 
1027 	target = pt_regs_read_reg(regs, FIELD_GET(CFI_BRK_IMM_TARGET, esr));
1028 	type = (u32)pt_regs_read_reg(regs, FIELD_GET(CFI_BRK_IMM_TYPE, esr));
1029 
1030 	switch (report_cfi_failure(regs, regs->pc, &target, type)) {
1031 	case BUG_TRAP_TYPE_BUG:
1032 		die("Oops - CFI", regs, esr);
1033 		break;
1034 
1035 	case BUG_TRAP_TYPE_WARN:
1036 		break;
1037 
1038 	default:
1039 		return DBG_HOOK_ERROR;
1040 	}
1041 
1042 	arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
1043 	return DBG_HOOK_HANDLED;
1044 }
1045 #endif /* CONFIG_CFI */
1046 
1047 int reserved_fault_brk_handler(struct pt_regs *regs, unsigned long esr)
1048 {
1049 	pr_err("%s generated an invalid instruction at %pS!\n",
1050 		"Kernel text patching",
1051 		(void *)instruction_pointer(regs));
1052 
1053 	/* We cannot handle this */
1054 	return DBG_HOOK_ERROR;
1055 }
1056 
1057 #ifdef CONFIG_KASAN_SW_TAGS
1058 
1059 #define KASAN_ESR_RECOVER	0x20
1060 #define KASAN_ESR_WRITE	0x10
1061 #define KASAN_ESR_SIZE_MASK	0x0f
1062 #define KASAN_ESR_SIZE(esr)	(1 << ((esr) & KASAN_ESR_SIZE_MASK))
1063 
1064 int kasan_brk_handler(struct pt_regs *regs, unsigned long esr)
1065 {
1066 	bool recover = esr & KASAN_ESR_RECOVER;
1067 	bool write = esr & KASAN_ESR_WRITE;
1068 	size_t size = KASAN_ESR_SIZE(esr);
1069 	void *addr = (void *)regs->regs[0];
1070 	u64 pc = regs->pc;
1071 
1072 	kasan_report(addr, size, write, pc);
1073 
1074 	/*
1075 	 * The instrumentation allows to control whether we can proceed after
1076 	 * a crash was detected. This is done by passing the -recover flag to
1077 	 * the compiler. Disabling recovery allows to generate more compact
1078 	 * code.
1079 	 *
1080 	 * Unfortunately disabling recovery doesn't work for the kernel right
1081 	 * now. KASAN reporting is disabled in some contexts (for example when
1082 	 * the allocator accesses slab object metadata; this is controlled by
1083 	 * current->kasan_depth). All these accesses are detected by the tool,
1084 	 * even though the reports for them are not printed.
1085 	 *
1086 	 * This is something that might be fixed at some point in the future.
1087 	 */
1088 	if (!recover)
1089 		die("Oops - KASAN", regs, esr);
1090 
1091 	/* If thread survives, skip over the brk instruction and continue: */
1092 	arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
1093 	return DBG_HOOK_HANDLED;
1094 }
1095 #endif
1096 
1097 #ifdef CONFIG_UBSAN_TRAP
1098 int ubsan_brk_handler(struct pt_regs *regs, unsigned long esr)
1099 {
1100 	die(report_ubsan_failure(esr & UBSAN_BRK_MASK), regs, esr);
1101 	return DBG_HOOK_HANDLED;
1102 }
1103 #endif
1104