xref: /linux/arch/powerpc/kernel/syscall.c (revision b6b1334c9510e162bd8ca0ae58403cafad9572f1)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 
3 #include <linux/compat.h>
4 #include <linux/context_tracking.h>
5 #include <linux/randomize_kstack.h>
6 
7 #include <asm/interrupt.h>
8 #include <asm/kup.h>
9 #include <asm/syscall.h>
10 #include <asm/time.h>
11 #include <asm/tm.h>
12 #include <asm/unistd.h>
13 
14 
15 typedef long (*syscall_fn)(long, long, long, long, long, long);
16 
17 /* Has to run notrace because it is entered not completely "reconciled" */
18 notrace long system_call_exception(long r3, long r4, long r5,
19 				   long r6, long r7, long r8,
20 				   unsigned long r0, struct pt_regs *regs)
21 {
22 	long ret;
23 	syscall_fn f;
24 
25 	kuap_lock();
26 
27 	add_random_kstack_offset();
28 
29 	if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
30 		BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
31 
32 	trace_hardirqs_off(); /* finish reconciling */
33 
34 	CT_WARN_ON(ct_state() == CONTEXT_KERNEL);
35 	user_exit_irqoff();
36 
37 	BUG_ON(regs_is_unrecoverable(regs));
38 	BUG_ON(!(regs->msr & MSR_PR));
39 	BUG_ON(arch_irq_disabled_regs(regs));
40 
41 #ifdef CONFIG_PPC_PKEY
42 	if (mmu_has_feature(MMU_FTR_PKEY)) {
43 		unsigned long amr, iamr;
44 		bool flush_needed = false;
45 		/*
46 		 * When entering from userspace we mostly have the AMR/IAMR
47 		 * different from kernel default values. Hence don't compare.
48 		 */
49 		amr = mfspr(SPRN_AMR);
50 		iamr = mfspr(SPRN_IAMR);
51 		regs->amr  = amr;
52 		regs->iamr = iamr;
53 		if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) {
54 			mtspr(SPRN_AMR, AMR_KUAP_BLOCKED);
55 			flush_needed = true;
56 		}
57 		if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) {
58 			mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED);
59 			flush_needed = true;
60 		}
61 		if (flush_needed)
62 			isync();
63 	} else
64 #endif
65 		kuap_assert_locked();
66 
67 	booke_restore_dbcr0();
68 
69 	account_cpu_user_entry();
70 
71 	account_stolen_time();
72 
73 	/*
74 	 * This is not required for the syscall exit path, but makes the
75 	 * stack frame look nicer. If this was initialised in the first stack
76 	 * frame, or if the unwinder was taught the first stack frame always
77 	 * returns to user with IRQS_ENABLED, this store could be avoided!
78 	 */
79 	irq_soft_mask_regs_set_state(regs, IRQS_ENABLED);
80 
81 	/*
82 	 * If system call is called with TM active, set _TIF_RESTOREALL to
83 	 * prevent RFSCV being used to return to userspace, because POWER9
84 	 * TM implementation has problems with this instruction returning to
85 	 * transactional state. Final register values are not relevant because
86 	 * the transaction will be aborted upon return anyway. Or in the case
87 	 * of unsupported_scv SIGILL fault, the return state does not much
88 	 * matter because it's an edge case.
89 	 */
90 	if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
91 			unlikely(MSR_TM_TRANSACTIONAL(regs->msr)))
92 		set_bits(_TIF_RESTOREALL, &current_thread_info()->flags);
93 
94 	/*
95 	 * If the system call was made with a transaction active, doom it and
96 	 * return without performing the system call. Unless it was an
97 	 * unsupported scv vector, in which case it's treated like an illegal
98 	 * instruction.
99 	 */
100 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
101 	if (unlikely(MSR_TM_TRANSACTIONAL(regs->msr)) &&
102 	    !trap_is_unsupported_scv(regs)) {
103 		/* Enable TM in the kernel, and disable EE (for scv) */
104 		hard_irq_disable();
105 		mtmsr(mfmsr() | MSR_TM);
106 
107 		/* tabort, this dooms the transaction, nothing else */
108 		asm volatile(".long 0x7c00071d | ((%0) << 16)"
109 				:: "r"(TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT));
110 
111 		/*
112 		 * Userspace will never see the return value. Execution will
113 		 * resume after the tbegin. of the aborted transaction with the
114 		 * checkpointed register state. A context switch could occur
115 		 * or signal delivered to the process before resuming the
116 		 * doomed transaction context, but that should all be handled
117 		 * as expected.
118 		 */
119 		return -ENOSYS;
120 	}
121 #endif // CONFIG_PPC_TRANSACTIONAL_MEM
122 
123 	local_irq_enable();
124 
125 	if (unlikely(read_thread_flags() & _TIF_SYSCALL_DOTRACE)) {
126 		if (unlikely(trap_is_unsupported_scv(regs))) {
127 			/* Unsupported scv vector */
128 			_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
129 			return regs->gpr[3];
130 		}
131 		/*
132 		 * We use the return value of do_syscall_trace_enter() as the
133 		 * syscall number. If the syscall was rejected for any reason
134 		 * do_syscall_trace_enter() returns an invalid syscall number
135 		 * and the test against NR_syscalls will fail and the return
136 		 * value to be used is in regs->gpr[3].
137 		 */
138 		r0 = do_syscall_trace_enter(regs);
139 		if (unlikely(r0 >= NR_syscalls))
140 			return regs->gpr[3];
141 		r3 = regs->gpr[3];
142 		r4 = regs->gpr[4];
143 		r5 = regs->gpr[5];
144 		r6 = regs->gpr[6];
145 		r7 = regs->gpr[7];
146 		r8 = regs->gpr[8];
147 
148 	} else if (unlikely(r0 >= NR_syscalls)) {
149 		if (unlikely(trap_is_unsupported_scv(regs))) {
150 			/* Unsupported scv vector */
151 			_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
152 			return regs->gpr[3];
153 		}
154 		return -ENOSYS;
155 	}
156 
157 	/* May be faster to do array_index_nospec? */
158 	barrier_nospec();
159 
160 	if (unlikely(is_compat_task())) {
161 		f = (void *)compat_sys_call_table[r0];
162 
163 		r3 &= 0x00000000ffffffffULL;
164 		r4 &= 0x00000000ffffffffULL;
165 		r5 &= 0x00000000ffffffffULL;
166 		r6 &= 0x00000000ffffffffULL;
167 		r7 &= 0x00000000ffffffffULL;
168 		r8 &= 0x00000000ffffffffULL;
169 
170 	} else {
171 		f = (void *)sys_call_table[r0];
172 	}
173 
174 	ret = f(r3, r4, r5, r6, r7, r8);
175 
176 	/*
177 	 * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(),
178 	 * so the maximum stack offset is 1k bytes (10 bits).
179 	 *
180 	 * The actual entropy will be further reduced by the compiler when
181 	 * applying stack alignment constraints: the powerpc architecture
182 	 * may have two kinds of stack alignment (16-bytes and 8-bytes).
183 	 *
184 	 * So the resulting 6 or 7 bits of entropy is seen in SP[9:4] or SP[9:3].
185 	 */
186 	choose_random_kstack_offset(mftb());
187 
188 	return ret;
189 }
190