1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * S390 version 4 * Copyright IBM Corp. 1999 5 * Author(s): Hartmut Penner (hp@de.ibm.com) 6 * Ulrich Weigand (uweigand@de.ibm.com) 7 * 8 * Derived from "arch/i386/mm/fault.c" 9 * Copyright (C) 1995 Linus Torvalds 10 */ 11 12 #include <linux/kernel_stat.h> 13 #include <linux/mmu_context.h> 14 #include <linux/perf_event.h> 15 #include <linux/signal.h> 16 #include <linux/sched.h> 17 #include <linux/sched/debug.h> 18 #include <linux/jump_label.h> 19 #include <linux/kernel.h> 20 #include <linux/errno.h> 21 #include <linux/string.h> 22 #include <linux/types.h> 23 #include <linux/ptrace.h> 24 #include <linux/mman.h> 25 #include <linux/mm.h> 26 #include <linux/compat.h> 27 #include <linux/smp.h> 28 #include <linux/kdebug.h> 29 #include <linux/init.h> 30 #include <linux/console.h> 31 #include <linux/extable.h> 32 #include <linux/hardirq.h> 33 #include <linux/kprobes.h> 34 #include <linux/uaccess.h> 35 #include <linux/hugetlb.h> 36 #include <linux/kfence.h> 37 #include <linux/pagewalk.h> 38 #include <asm/asm-extable.h> 39 #include <asm/asm-offsets.h> 40 #include <asm/ptrace.h> 41 #include <asm/fault.h> 42 #include <asm/diag.h> 43 #include <asm/gmap.h> 44 #include <asm/irq.h> 45 #include <asm/facility.h> 46 #include <asm/uv.h> 47 #include "../kernel/entry.h" 48 49 static DEFINE_STATIC_KEY_FALSE(have_store_indication); 50 51 static int __init fault_init(void) 52 { 53 if (test_facility(75)) 54 static_branch_enable(&have_store_indication); 55 return 0; 56 } 57 early_initcall(fault_init); 58 59 /* 60 * Find out which address space caused the exception. 61 */ 62 static bool is_kernel_fault(struct pt_regs *regs) 63 { 64 union teid teid = { .val = regs->int_parm_long }; 65 66 if (user_mode(regs)) 67 return false; 68 if (teid.as == PSW_BITS_AS_SECONDARY) 69 return false; 70 return true; 71 } 72 73 static unsigned long get_fault_address(struct pt_regs *regs) 74 { 75 union teid teid = { .val = regs->int_parm_long }; 76 77 return teid.addr * PAGE_SIZE; 78 } 79 80 static __always_inline bool fault_is_write(struct pt_regs *regs) 81 { 82 union teid teid = { .val = regs->int_parm_long }; 83 84 if (static_branch_likely(&have_store_indication)) 85 return teid.fsi == TEID_FSI_STORE; 86 return false; 87 } 88 89 static void dump_pagetable(unsigned long asce, unsigned long address) 90 { 91 unsigned long entry, *table = __va(asce & _ASCE_ORIGIN); 92 93 pr_alert("AS:%016lx ", asce); 94 switch (asce & _ASCE_TYPE_MASK) { 95 case _ASCE_TYPE_REGION1: 96 table += (address & _REGION1_INDEX) >> _REGION1_SHIFT; 97 if (get_kernel_nofault(entry, table)) 98 goto bad; 99 pr_cont("R1:%016lx ", entry); 100 if (entry & _REGION_ENTRY_INVALID) 101 goto out; 102 table = __va(entry & _REGION_ENTRY_ORIGIN); 103 fallthrough; 104 case _ASCE_TYPE_REGION2: 105 table += (address & _REGION2_INDEX) >> _REGION2_SHIFT; 106 if (get_kernel_nofault(entry, table)) 107 goto bad; 108 pr_cont("R2:%016lx ", entry); 109 if (entry & _REGION_ENTRY_INVALID) 110 goto out; 111 table = __va(entry & _REGION_ENTRY_ORIGIN); 112 fallthrough; 113 case _ASCE_TYPE_REGION3: 114 table += (address & _REGION3_INDEX) >> _REGION3_SHIFT; 115 if (get_kernel_nofault(entry, table)) 116 goto bad; 117 pr_cont("R3:%016lx ", entry); 118 if (entry & (_REGION_ENTRY_INVALID | _REGION3_ENTRY_LARGE)) 119 goto out; 120 table = __va(entry & _REGION_ENTRY_ORIGIN); 121 fallthrough; 122 case _ASCE_TYPE_SEGMENT: 123 table += (address & _SEGMENT_INDEX) >> _SEGMENT_SHIFT; 124 if (get_kernel_nofault(entry, table)) 125 goto bad; 126 pr_cont("S:%016lx ", entry); 127 if (entry & (_SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_LARGE)) 128 goto out; 129 table = __va(entry & _SEGMENT_ENTRY_ORIGIN); 130 } 131 table += (address & _PAGE_INDEX) >> PAGE_SHIFT; 132 if (get_kernel_nofault(entry, table)) 133 goto bad; 134 pr_cont("P:%016lx ", entry); 135 out: 136 pr_cont("\n"); 137 return; 138 bad: 139 pr_cont("BAD\n"); 140 } 141 142 static void dump_fault_info(struct pt_regs *regs) 143 { 144 union teid teid = { .val = regs->int_parm_long }; 145 unsigned long asce; 146 147 pr_alert("Failing address: %016lx TEID: %016lx\n", 148 get_fault_address(regs), teid.val); 149 pr_alert("Fault in "); 150 switch (teid.as) { 151 case PSW_BITS_AS_HOME: 152 pr_cont("home space "); 153 break; 154 case PSW_BITS_AS_SECONDARY: 155 pr_cont("secondary space "); 156 break; 157 case PSW_BITS_AS_ACCREG: 158 pr_cont("access register "); 159 break; 160 case PSW_BITS_AS_PRIMARY: 161 pr_cont("primary space "); 162 break; 163 } 164 pr_cont("mode while using "); 165 if (is_kernel_fault(regs)) { 166 asce = get_lowcore()->kernel_asce.val; 167 pr_cont("kernel "); 168 } else { 169 asce = get_lowcore()->user_asce.val; 170 pr_cont("user "); 171 } 172 pr_cont("ASCE.\n"); 173 dump_pagetable(asce, get_fault_address(regs)); 174 } 175 176 int show_unhandled_signals = 1; 177 178 void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault) 179 { 180 static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); 181 182 if ((task_pid_nr(current) > 1) && !show_unhandled_signals) 183 return; 184 if (!unhandled_signal(current, signr)) 185 return; 186 if (!__ratelimit(&rs)) 187 return; 188 pr_alert("User process fault: interruption code %04x ilc:%d ", 189 regs->int_code & 0xffff, regs->int_code >> 17); 190 print_vma_addr(KERN_CONT "in ", regs->psw.addr); 191 pr_cont("\n"); 192 if (is_mm_fault) 193 dump_fault_info(regs); 194 show_regs(regs); 195 } 196 197 static void do_sigsegv(struct pt_regs *regs, int si_code) 198 { 199 report_user_fault(regs, SIGSEGV, 1); 200 force_sig_fault(SIGSEGV, si_code, (void __user *)get_fault_address(regs)); 201 } 202 203 static void handle_fault_error_nolock(struct pt_regs *regs, int si_code) 204 { 205 unsigned long address; 206 bool is_write; 207 208 if (user_mode(regs)) { 209 if (WARN_ON_ONCE(!si_code)) 210 si_code = SEGV_MAPERR; 211 return do_sigsegv(regs, si_code); 212 } 213 if (fixup_exception(regs)) 214 return; 215 if (is_kernel_fault(regs)) { 216 address = get_fault_address(regs); 217 is_write = fault_is_write(regs); 218 if (kfence_handle_page_fault(address, is_write, regs)) 219 return; 220 pr_alert("Unable to handle kernel pointer dereference in virtual kernel address space\n"); 221 } else { 222 pr_alert("Unable to handle kernel paging request in virtual user address space\n"); 223 } 224 dump_fault_info(regs); 225 die(regs, "Oops"); 226 } 227 228 static void handle_fault_error(struct pt_regs *regs, int si_code) 229 { 230 struct mm_struct *mm = current->mm; 231 232 mmap_read_unlock(mm); 233 handle_fault_error_nolock(regs, si_code); 234 } 235 236 static void do_sigbus(struct pt_regs *regs) 237 { 238 force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)get_fault_address(regs)); 239 } 240 241 /* 242 * This routine handles page faults. It determines the address, 243 * and the problem, and then passes it off to one of the appropriate 244 * routines. 245 * 246 * interruption code (int_code): 247 * 04 Protection -> Write-Protection (suppression) 248 * 10 Segment translation -> Not present (nullification) 249 * 11 Page translation -> Not present (nullification) 250 * 3b Region third trans. -> Not present (nullification) 251 */ 252 static void do_exception(struct pt_regs *regs, int access) 253 { 254 struct vm_area_struct *vma; 255 unsigned long address; 256 struct mm_struct *mm; 257 unsigned int flags; 258 vm_fault_t fault; 259 bool is_write; 260 261 /* 262 * The instruction that caused the program check has 263 * been nullified. Don't signal single step via SIGTRAP. 264 */ 265 clear_thread_flag(TIF_PER_TRAP); 266 if (kprobe_page_fault(regs, 14)) 267 return; 268 mm = current->mm; 269 address = get_fault_address(regs); 270 is_write = fault_is_write(regs); 271 if (is_kernel_fault(regs) || faulthandler_disabled() || !mm) 272 return handle_fault_error_nolock(regs, 0); 273 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); 274 flags = FAULT_FLAG_DEFAULT; 275 if (user_mode(regs)) 276 flags |= FAULT_FLAG_USER; 277 if (is_write) 278 access = VM_WRITE; 279 if (access == VM_WRITE) 280 flags |= FAULT_FLAG_WRITE; 281 if (!(flags & FAULT_FLAG_USER)) 282 goto lock_mmap; 283 vma = lock_vma_under_rcu(mm, address); 284 if (!vma) 285 goto lock_mmap; 286 if (!(vma->vm_flags & access)) { 287 vma_end_read(vma); 288 count_vm_vma_lock_event(VMA_LOCK_SUCCESS); 289 return handle_fault_error_nolock(regs, SEGV_ACCERR); 290 } 291 fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs); 292 if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED))) 293 vma_end_read(vma); 294 if (!(fault & VM_FAULT_RETRY)) { 295 count_vm_vma_lock_event(VMA_LOCK_SUCCESS); 296 goto done; 297 } 298 count_vm_vma_lock_event(VMA_LOCK_RETRY); 299 if (fault & VM_FAULT_MAJOR) 300 flags |= FAULT_FLAG_TRIED; 301 /* Quick path to respond to signals */ 302 if (fault_signal_pending(fault, regs)) { 303 if (!user_mode(regs)) 304 handle_fault_error_nolock(regs, 0); 305 return; 306 } 307 lock_mmap: 308 retry: 309 vma = lock_mm_and_find_vma(mm, address, regs); 310 if (!vma) 311 return handle_fault_error_nolock(regs, SEGV_MAPERR); 312 if (unlikely(!(vma->vm_flags & access))) 313 return handle_fault_error(regs, SEGV_ACCERR); 314 fault = handle_mm_fault(vma, address, flags, regs); 315 if (fault_signal_pending(fault, regs)) { 316 if (!user_mode(regs)) 317 handle_fault_error_nolock(regs, 0); 318 return; 319 } 320 /* The fault is fully completed (including releasing mmap lock) */ 321 if (fault & VM_FAULT_COMPLETED) 322 return; 323 if (fault & VM_FAULT_RETRY) { 324 flags |= FAULT_FLAG_TRIED; 325 goto retry; 326 } 327 mmap_read_unlock(mm); 328 done: 329 if (!(fault & VM_FAULT_ERROR)) 330 return; 331 if (fault & VM_FAULT_OOM) { 332 if (!user_mode(regs)) 333 handle_fault_error_nolock(regs, 0); 334 else 335 pagefault_out_of_memory(); 336 } else if (fault & VM_FAULT_SIGSEGV) { 337 if (!user_mode(regs)) 338 handle_fault_error_nolock(regs, 0); 339 else 340 do_sigsegv(regs, SEGV_MAPERR); 341 } else if (fault & (VM_FAULT_SIGBUS | VM_FAULT_HWPOISON)) { 342 if (!user_mode(regs)) 343 handle_fault_error_nolock(regs, 0); 344 else 345 do_sigbus(regs); 346 } else { 347 pr_emerg("Unexpected fault flags: %08x\n", fault); 348 BUG(); 349 } 350 } 351 352 void do_protection_exception(struct pt_regs *regs) 353 { 354 union teid teid = { .val = regs->int_parm_long }; 355 356 /* 357 * Protection exceptions are suppressing, decrement psw address. 358 * The exception to this rule are aborted transactions, for these 359 * the PSW already points to the correct location. 360 */ 361 if (!(regs->int_code & 0x200)) 362 regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16); 363 /* 364 * Check for low-address protection. This needs to be treated 365 * as a special case because the translation exception code 366 * field is not guaranteed to contain valid data in this case. 367 */ 368 if (unlikely(!teid.b61)) { 369 if (user_mode(regs)) { 370 /* Low-address protection in user mode: cannot happen */ 371 die(regs, "Low-address protection"); 372 } 373 /* 374 * Low-address protection in kernel mode means 375 * NULL pointer write access in kernel mode. 376 */ 377 return handle_fault_error_nolock(regs, 0); 378 } 379 if (unlikely(MACHINE_HAS_NX && teid.b56)) { 380 regs->int_parm_long = (teid.addr * PAGE_SIZE) | (regs->psw.addr & PAGE_MASK); 381 return handle_fault_error_nolock(regs, SEGV_ACCERR); 382 } 383 do_exception(regs, VM_WRITE); 384 } 385 NOKPROBE_SYMBOL(do_protection_exception); 386 387 void do_dat_exception(struct pt_regs *regs) 388 { 389 do_exception(regs, VM_ACCESS_FLAGS); 390 } 391 NOKPROBE_SYMBOL(do_dat_exception); 392 393 #if IS_ENABLED(CONFIG_PGSTE) 394 395 void do_secure_storage_access(struct pt_regs *regs) 396 { 397 union teid teid = { .val = regs->int_parm_long }; 398 unsigned long addr = get_fault_address(regs); 399 struct vm_area_struct *vma; 400 struct folio_walk fw; 401 struct mm_struct *mm; 402 struct folio *folio; 403 int rc; 404 405 /* 406 * Bit 61 indicates if the address is valid, if it is not the 407 * kernel should be stopped or SIGSEGV should be sent to the 408 * process. Bit 61 is not reliable without the misc UV feature, 409 * therefore this needs to be checked too. 410 */ 411 if (uv_has_feature(BIT_UV_FEAT_MISC) && !teid.b61) { 412 /* 413 * When this happens, userspace did something that it 414 * was not supposed to do, e.g. branching into secure 415 * memory. Trigger a segmentation fault. 416 */ 417 if (user_mode(regs)) { 418 send_sig(SIGSEGV, current, 0); 419 return; 420 } 421 /* 422 * The kernel should never run into this case and 423 * there is no way out of this situation. 424 */ 425 panic("Unexpected PGM 0x3d with TEID bit 61=0"); 426 } 427 if (is_kernel_fault(regs)) { 428 folio = phys_to_folio(addr); 429 if (unlikely(!folio_try_get(folio))) 430 return; 431 rc = arch_make_folio_accessible(folio); 432 folio_put(folio); 433 if (rc) 434 BUG(); 435 } else { 436 mm = current->mm; 437 mmap_read_lock(mm); 438 vma = find_vma(mm, addr); 439 if (!vma) 440 return handle_fault_error(regs, SEGV_MAPERR); 441 folio = folio_walk_start(&fw, vma, addr, 0); 442 if (!folio) { 443 mmap_read_unlock(mm); 444 return; 445 } 446 /* arch_make_folio_accessible() needs a raised refcount. */ 447 folio_get(folio); 448 rc = arch_make_folio_accessible(folio); 449 folio_put(folio); 450 folio_walk_end(&fw, vma); 451 if (rc) 452 send_sig(SIGSEGV, current, 0); 453 mmap_read_unlock(mm); 454 } 455 } 456 NOKPROBE_SYMBOL(do_secure_storage_access); 457 458 #endif /* CONFIG_PGSTE */ 459