1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * S390 version
4 * Copyright IBM Corp. 1999
5 * Author(s): Hartmut Penner (hp@de.ibm.com)
6 * Ulrich Weigand (uweigand@de.ibm.com)
7 *
8 * Derived from "arch/i386/mm/fault.c"
9 * Copyright (C) 1995 Linus Torvalds
10 */
11
12 #include <linux/kernel_stat.h>
13 #include <linux/mmu_context.h>
14 #include <linux/cpufeature.h>
15 #include <linux/perf_event.h>
16 #include <linux/signal.h>
17 #include <linux/sched.h>
18 #include <linux/sched/debug.h>
19 #include <linux/kernel.h>
20 #include <linux/errno.h>
21 #include <linux/string.h>
22 #include <linux/types.h>
23 #include <linux/ptrace.h>
24 #include <linux/mman.h>
25 #include <linux/mm.h>
26 #include <linux/compat.h>
27 #include <linux/smp.h>
28 #include <linux/kdebug.h>
29 #include <linux/init.h>
30 #include <linux/console.h>
31 #include <linux/extable.h>
32 #include <linux/hardirq.h>
33 #include <linux/kprobes.h>
34 #include <linux/uaccess.h>
35 #include <linux/hugetlb.h>
36 #include <linux/kfence.h>
37 #include <linux/pagewalk.h>
38 #include <asm/asm-extable.h>
39 #include <asm/asm-offsets.h>
40 #include <asm/ptrace.h>
41 #include <asm/fault.h>
42 #include <asm/diag.h>
43 #include <asm/irq.h>
44 #include <asm/facility.h>
45 #include <asm/uv.h>
46 #include "../kernel/entry.h"
47
48 /*
49 * Find out which address space caused the exception.
50 */
is_kernel_fault(struct pt_regs * regs)51 static bool is_kernel_fault(struct pt_regs *regs)
52 {
53 union teid teid = { .val = regs->int_parm_long };
54
55 if (user_mode(regs))
56 return false;
57 if (teid.as == PSW_BITS_AS_SECONDARY)
58 return false;
59 return true;
60 }
61
get_fault_address(struct pt_regs * regs)62 static unsigned long get_fault_address(struct pt_regs *regs)
63 {
64 union teid teid = { .val = regs->int_parm_long };
65
66 return teid.addr * PAGE_SIZE;
67 }
68
fault_is_write(struct pt_regs * regs)69 static __always_inline bool fault_is_write(struct pt_regs *regs)
70 {
71 union teid teid = { .val = regs->int_parm_long };
72
73 if (test_facility(75))
74 return teid.fsi == TEID_FSI_STORE;
75 return false;
76 }
77
dump_pagetable(unsigned long asce,unsigned long address)78 static void dump_pagetable(unsigned long asce, unsigned long address)
79 {
80 unsigned long entry, *table = __va(asce & _ASCE_ORIGIN);
81
82 pr_alert("AS:%016lx ", asce);
83 switch (asce & _ASCE_TYPE_MASK) {
84 case _ASCE_TYPE_REGION1:
85 table += (address & _REGION1_INDEX) >> _REGION1_SHIFT;
86 if (get_kernel_nofault(entry, table))
87 goto bad;
88 pr_cont("R1:%016lx ", entry);
89 if (entry & _REGION_ENTRY_INVALID)
90 goto out;
91 table = __va(entry & _REGION_ENTRY_ORIGIN);
92 fallthrough;
93 case _ASCE_TYPE_REGION2:
94 table += (address & _REGION2_INDEX) >> _REGION2_SHIFT;
95 if (get_kernel_nofault(entry, table))
96 goto bad;
97 pr_cont("R2:%016lx ", entry);
98 if (entry & _REGION_ENTRY_INVALID)
99 goto out;
100 table = __va(entry & _REGION_ENTRY_ORIGIN);
101 fallthrough;
102 case _ASCE_TYPE_REGION3:
103 table += (address & _REGION3_INDEX) >> _REGION3_SHIFT;
104 if (get_kernel_nofault(entry, table))
105 goto bad;
106 pr_cont("R3:%016lx ", entry);
107 if (entry & (_REGION_ENTRY_INVALID | _REGION3_ENTRY_LARGE))
108 goto out;
109 table = __va(entry & _REGION_ENTRY_ORIGIN);
110 fallthrough;
111 case _ASCE_TYPE_SEGMENT:
112 table += (address & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
113 if (get_kernel_nofault(entry, table))
114 goto bad;
115 pr_cont("S:%016lx ", entry);
116 if (entry & (_SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_LARGE))
117 goto out;
118 table = __va(entry & _SEGMENT_ENTRY_ORIGIN);
119 }
120 table += (address & _PAGE_INDEX) >> PAGE_SHIFT;
121 if (get_kernel_nofault(entry, table))
122 goto bad;
123 pr_cont("P:%016lx ", entry);
124 out:
125 pr_cont("\n");
126 return;
127 bad:
128 pr_cont("BAD\n");
129 }
130
dump_fault_info(struct pt_regs * regs)131 static void dump_fault_info(struct pt_regs *regs)
132 {
133 union teid teid = { .val = regs->int_parm_long };
134 unsigned long asce;
135
136 pr_alert("Failing address: %016lx TEID: %016lx\n",
137 get_fault_address(regs), teid.val);
138 pr_alert("Fault in ");
139 switch (teid.as) {
140 case PSW_BITS_AS_HOME:
141 pr_cont("home space ");
142 break;
143 case PSW_BITS_AS_SECONDARY:
144 pr_cont("secondary space ");
145 break;
146 case PSW_BITS_AS_ACCREG:
147 pr_cont("access register ");
148 break;
149 case PSW_BITS_AS_PRIMARY:
150 pr_cont("primary space ");
151 break;
152 }
153 pr_cont("mode while using ");
154 if (is_kernel_fault(regs)) {
155 asce = get_lowcore()->kernel_asce.val;
156 pr_cont("kernel ");
157 } else {
158 asce = get_lowcore()->user_asce.val;
159 pr_cont("user ");
160 }
161 pr_cont("ASCE.\n");
162 dump_pagetable(asce, get_fault_address(regs));
163 }
164
165 int show_unhandled_signals = 1;
166
167 static const struct ctl_table s390_fault_sysctl_table[] = {
168 {
169 .procname = "userprocess_debug",
170 .data = &show_unhandled_signals,
171 .maxlen = sizeof(int),
172 .mode = 0644,
173 .proc_handler = proc_dointvec,
174 },
175 };
176
init_s390_fault_sysctls(void)177 static int __init init_s390_fault_sysctls(void)
178 {
179 register_sysctl_init("kernel", s390_fault_sysctl_table);
180 return 0;
181 }
182 arch_initcall(init_s390_fault_sysctls);
183
report_user_fault(struct pt_regs * regs,long signr,int is_mm_fault)184 void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault)
185 {
186 static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST);
187
188 if ((task_pid_nr(current) > 1) && !show_unhandled_signals)
189 return;
190 if (!unhandled_signal(current, signr))
191 return;
192 if (!__ratelimit(&rs))
193 return;
194 pr_alert("User process fault: interruption code %04x ilc:%d ",
195 regs->int_code & 0xffff, regs->int_code >> 17);
196 print_vma_addr(KERN_CONT "in ", regs->psw.addr);
197 pr_cont("\n");
198 if (is_mm_fault)
199 dump_fault_info(regs);
200 show_regs(regs);
201 }
202
do_sigsegv(struct pt_regs * regs,int si_code)203 static void do_sigsegv(struct pt_regs *regs, int si_code)
204 {
205 report_user_fault(regs, SIGSEGV, 1);
206 force_sig_fault(SIGSEGV, si_code, (void __user *)get_fault_address(regs));
207 }
208
handle_fault_error_nolock(struct pt_regs * regs,int si_code)209 static void handle_fault_error_nolock(struct pt_regs *regs, int si_code)
210 {
211 unsigned long address;
212 bool is_write;
213
214 if (user_mode(regs)) {
215 if (WARN_ON_ONCE(!si_code))
216 si_code = SEGV_MAPERR;
217 return do_sigsegv(regs, si_code);
218 }
219 if (fixup_exception(regs))
220 return;
221 if (is_kernel_fault(regs)) {
222 address = get_fault_address(regs);
223 is_write = fault_is_write(regs);
224 if (kfence_handle_page_fault(address, is_write, regs))
225 return;
226 pr_alert("Unable to handle kernel pointer dereference in virtual kernel address space\n");
227 } else {
228 pr_alert("Unable to handle kernel paging request in virtual user address space\n");
229 }
230 dump_fault_info(regs);
231 die(regs, "Oops");
232 }
233
handle_fault_error(struct pt_regs * regs,int si_code)234 static void handle_fault_error(struct pt_regs *regs, int si_code)
235 {
236 struct mm_struct *mm = current->mm;
237
238 mmap_read_unlock(mm);
239 handle_fault_error_nolock(regs, si_code);
240 }
241
do_sigbus(struct pt_regs * regs)242 static void do_sigbus(struct pt_regs *regs)
243 {
244 force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)get_fault_address(regs));
245 }
246
247 /*
248 * This routine handles page faults. It determines the address,
249 * and the problem, and then passes it off to one of the appropriate
250 * routines.
251 *
252 * interruption code (int_code):
253 * 04 Protection -> Write-Protection (suppression)
254 * 10 Segment translation -> Not present (nullification)
255 * 11 Page translation -> Not present (nullification)
256 * 3b Region third trans. -> Not present (nullification)
257 */
do_exception(struct pt_regs * regs,int access)258 static void do_exception(struct pt_regs *regs, int access)
259 {
260 struct vm_area_struct *vma;
261 unsigned long address;
262 struct mm_struct *mm;
263 unsigned int flags;
264 vm_fault_t fault;
265 bool is_write;
266
267 /*
268 * The instruction that caused the program check has
269 * been nullified. Don't signal single step via SIGTRAP.
270 */
271 clear_thread_flag(TIF_PER_TRAP);
272 if (kprobe_page_fault(regs, 14))
273 return;
274 mm = current->mm;
275 address = get_fault_address(regs);
276 is_write = fault_is_write(regs);
277 if (is_kernel_fault(regs) || faulthandler_disabled() || !mm)
278 return handle_fault_error_nolock(regs, 0);
279 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
280 flags = FAULT_FLAG_DEFAULT;
281 if (user_mode(regs))
282 flags |= FAULT_FLAG_USER;
283 if (is_write)
284 access = VM_WRITE;
285 if (access == VM_WRITE)
286 flags |= FAULT_FLAG_WRITE;
287 if (!(flags & FAULT_FLAG_USER))
288 goto lock_mmap;
289 vma = lock_vma_under_rcu(mm, address);
290 if (!vma)
291 goto lock_mmap;
292 if (!(vma->vm_flags & access)) {
293 vma_end_read(vma);
294 count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
295 return handle_fault_error_nolock(regs, SEGV_ACCERR);
296 }
297 fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
298 if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
299 vma_end_read(vma);
300 if (!(fault & VM_FAULT_RETRY)) {
301 count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
302 goto done;
303 }
304 count_vm_vma_lock_event(VMA_LOCK_RETRY);
305 if (fault & VM_FAULT_MAJOR)
306 flags |= FAULT_FLAG_TRIED;
307 /* Quick path to respond to signals */
308 if (fault_signal_pending(fault, regs)) {
309 if (!user_mode(regs))
310 handle_fault_error_nolock(regs, 0);
311 return;
312 }
313 lock_mmap:
314 retry:
315 vma = lock_mm_and_find_vma(mm, address, regs);
316 if (!vma)
317 return handle_fault_error_nolock(regs, SEGV_MAPERR);
318 if (unlikely(!(vma->vm_flags & access)))
319 return handle_fault_error(regs, SEGV_ACCERR);
320 fault = handle_mm_fault(vma, address, flags, regs);
321 if (fault_signal_pending(fault, regs)) {
322 if (!user_mode(regs))
323 handle_fault_error_nolock(regs, 0);
324 return;
325 }
326 /* The fault is fully completed (including releasing mmap lock) */
327 if (fault & VM_FAULT_COMPLETED)
328 return;
329 if (fault & VM_FAULT_RETRY) {
330 flags |= FAULT_FLAG_TRIED;
331 goto retry;
332 }
333 mmap_read_unlock(mm);
334 done:
335 if (!(fault & VM_FAULT_ERROR))
336 return;
337 if (fault & VM_FAULT_OOM) {
338 if (!user_mode(regs))
339 handle_fault_error_nolock(regs, 0);
340 else
341 pagefault_out_of_memory();
342 } else if (fault & VM_FAULT_SIGSEGV) {
343 if (!user_mode(regs))
344 handle_fault_error_nolock(regs, 0);
345 else
346 do_sigsegv(regs, SEGV_MAPERR);
347 } else if (fault & (VM_FAULT_SIGBUS | VM_FAULT_HWPOISON |
348 VM_FAULT_HWPOISON_LARGE)) {
349 if (!user_mode(regs))
350 handle_fault_error_nolock(regs, 0);
351 else
352 do_sigbus(regs);
353 } else {
354 pr_emerg("Unexpected fault flags: %08x\n", fault);
355 BUG();
356 }
357 }
358
do_protection_exception(struct pt_regs * regs)359 void do_protection_exception(struct pt_regs *regs)
360 {
361 union teid teid = { .val = regs->int_parm_long };
362
363 /*
364 * Protection exceptions are suppressing, decrement psw address.
365 * The exception to this rule are aborted transactions, for these
366 * the PSW already points to the correct location.
367 */
368 if (!(regs->int_code & 0x200))
369 regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16);
370 /*
371 * Check for low-address protection. This needs to be treated
372 * as a special case because the translation exception code
373 * field is not guaranteed to contain valid data in this case.
374 */
375 if (unlikely(!teid.b61)) {
376 if (user_mode(regs)) {
377 /* Low-address protection in user mode: cannot happen */
378 dump_fault_info(regs);
379 die(regs, "Low-address protection");
380 }
381 /*
382 * Low-address protection in kernel mode means
383 * NULL pointer write access in kernel mode.
384 */
385 return handle_fault_error_nolock(regs, 0);
386 }
387 if (unlikely(cpu_has_nx() && teid.b56)) {
388 regs->int_parm_long = (teid.addr * PAGE_SIZE) | (regs->psw.addr & PAGE_MASK);
389 return handle_fault_error_nolock(regs, SEGV_ACCERR);
390 }
391 do_exception(regs, VM_WRITE);
392 }
393 NOKPROBE_SYMBOL(do_protection_exception);
394
do_dat_exception(struct pt_regs * regs)395 void do_dat_exception(struct pt_regs *regs)
396 {
397 do_exception(regs, VM_ACCESS_FLAGS);
398 }
399 NOKPROBE_SYMBOL(do_dat_exception);
400
401 #if IS_ENABLED(CONFIG_PGSTE)
402
do_secure_storage_access(struct pt_regs * regs)403 void do_secure_storage_access(struct pt_regs *regs)
404 {
405 union teid teid = { .val = regs->int_parm_long };
406 unsigned long addr = get_fault_address(regs);
407 struct vm_area_struct *vma;
408 struct folio_walk fw;
409 struct mm_struct *mm;
410 struct folio *folio;
411 int rc;
412
413 /*
414 * Bit 61 indicates if the address is valid, if it is not the
415 * kernel should be stopped or SIGSEGV should be sent to the
416 * process. Bit 61 is not reliable without the misc UV feature,
417 * therefore this needs to be checked too.
418 */
419 if (uv_has_feature(BIT_UV_FEAT_MISC) && !teid.b61) {
420 /*
421 * When this happens, userspace did something that it
422 * was not supposed to do, e.g. branching into secure
423 * memory. Trigger a segmentation fault.
424 */
425 if (user_mode(regs)) {
426 send_sig(SIGSEGV, current, 0);
427 return;
428 }
429 /*
430 * The kernel should never run into this case and
431 * there is no way out of this situation.
432 */
433 panic("Unexpected PGM 0x3d with TEID bit 61=0");
434 }
435 if (is_kernel_fault(regs)) {
436 folio = phys_to_folio(addr);
437 if (unlikely(!folio_try_get(folio)))
438 return;
439 rc = arch_make_folio_accessible(folio);
440 folio_put(folio);
441 if (rc)
442 BUG();
443 } else {
444 if (faulthandler_disabled())
445 return handle_fault_error_nolock(regs, 0);
446 mm = current->mm;
447 mmap_read_lock(mm);
448 vma = find_vma(mm, addr);
449 if (!vma)
450 return handle_fault_error(regs, SEGV_MAPERR);
451 folio = folio_walk_start(&fw, vma, addr, 0);
452 if (!folio) {
453 mmap_read_unlock(mm);
454 return;
455 }
456 /* arch_make_folio_accessible() needs a raised refcount. */
457 folio_get(folio);
458 rc = arch_make_folio_accessible(folio);
459 folio_put(folio);
460 folio_walk_end(&fw, vma);
461 if (rc)
462 send_sig(SIGSEGV, current, 0);
463 mmap_read_unlock(mm);
464 }
465 }
466 NOKPROBE_SYMBOL(do_secure_storage_access);
467
468 #endif /* CONFIG_PGSTE */
469