1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * shstk.c - Intel shadow stack support 4 * 5 * Copyright (c) 2021, Intel Corporation. 6 * Yu-cheng Yu <yu-cheng.yu@intel.com> 7 */ 8 9 #include <linux/sched.h> 10 #include <linux/bitops.h> 11 #include <linux/types.h> 12 #include <linux/mm.h> 13 #include <linux/mman.h> 14 #include <linux/slab.h> 15 #include <linux/uaccess.h> 16 #include <linux/sched/signal.h> 17 #include <linux/compat.h> 18 #include <linux/sizes.h> 19 #include <linux/user.h> 20 #include <linux/syscalls.h> 21 #include <asm/msr.h> 22 #include <asm/fpu/xstate.h> 23 #include <asm/fpu/types.h> 24 #include <asm/shstk.h> 25 #include <asm/special_insns.h> 26 #include <asm/fpu/api.h> 27 #include <asm/prctl.h> 28 29 #define SS_FRAME_SIZE 8 30 31 static bool features_enabled(unsigned long features) 32 { 33 return current->thread.features & features; 34 } 35 36 static void features_set(unsigned long features) 37 { 38 current->thread.features |= features; 39 } 40 41 static void features_clr(unsigned long features) 42 { 43 current->thread.features &= ~features; 44 } 45 46 /* 47 * Create a restore token on the shadow stack. A token is always 8-byte 48 * and aligned to 8. 49 */ 50 static int create_rstor_token(unsigned long ssp, unsigned long *token_addr) 51 { 52 unsigned long addr; 53 54 /* Token must be aligned */ 55 if (!IS_ALIGNED(ssp, 8)) 56 return -EINVAL; 57 58 addr = ssp - SS_FRAME_SIZE; 59 60 /* 61 * SSP is aligned, so reserved bits and mode bit are a zero, just mark 62 * the token 64-bit. 63 */ 64 ssp |= BIT(0); 65 66 if (write_user_shstk_64((u64 __user *)addr, (u64)ssp)) 67 return -EFAULT; 68 69 if (token_addr) 70 *token_addr = addr; 71 72 return 0; 73 } 74 75 /* 76 * VM_SHADOW_STACK will have a guard page. This helps userspace protect 77 * itself from attacks. The reasoning is as follows: 78 * 79 * The shadow stack pointer(SSP) is moved by CALL, RET, and INCSSPQ. The 80 * INCSSP instruction can increment the shadow stack pointer. It is the 81 * shadow stack analog of an instruction like: 82 * 83 * addq $0x80, %rsp 84 * 85 * However, there is one important difference between an ADD on %rsp 86 * and INCSSP. In addition to modifying SSP, INCSSP also reads from the 87 * memory of the first and last elements that were "popped". It can be 88 * thought of as acting like this: 89 * 90 * READ_ONCE(ssp); // read+discard top element on stack 91 * ssp += nr_to_pop * 8; // move the shadow stack 92 * READ_ONCE(ssp-8); // read+discard last popped stack element 93 * 94 * The maximum distance INCSSP can move the SSP is 2040 bytes, before 95 * it would read the memory. Therefore a single page gap will be enough 96 * to prevent any operation from shifting the SSP to an adjacent stack, 97 * since it would have to land in the gap at least once, causing a 98 * fault. 99 */ 100 static unsigned long alloc_shstk(unsigned long addr, unsigned long size, 101 unsigned long token_offset, bool set_res_tok) 102 { 103 int flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_ABOVE4G; 104 struct mm_struct *mm = current->mm; 105 unsigned long mapped_addr, unused; 106 107 if (addr) 108 flags |= MAP_FIXED_NOREPLACE; 109 110 mmap_write_lock(mm); 111 mapped_addr = do_mmap(NULL, addr, size, PROT_READ, flags, 112 VM_SHADOW_STACK | VM_WRITE, 0, &unused, NULL); 113 mmap_write_unlock(mm); 114 115 if (!set_res_tok || IS_ERR_VALUE(mapped_addr)) 116 goto out; 117 118 if (create_rstor_token(mapped_addr + token_offset, NULL)) { 119 vm_munmap(mapped_addr, size); 120 return -EINVAL; 121 } 122 123 out: 124 return mapped_addr; 125 } 126 127 static unsigned long adjust_shstk_size(unsigned long size) 128 { 129 if (size) 130 return PAGE_ALIGN(size); 131 132 return PAGE_ALIGN(min_t(unsigned long long, rlimit(RLIMIT_STACK), SZ_4G)); 133 } 134 135 static void unmap_shadow_stack(u64 base, u64 size) 136 { 137 int r; 138 139 r = vm_munmap(base, size); 140 141 /* 142 * mmap_write_lock_killable() failed with -EINTR. This means 143 * the process is about to die and have it's MM cleaned up. 144 * This task shouldn't ever make it back to userspace. In this 145 * case it is ok to leak a shadow stack, so just exit out. 146 */ 147 if (r == -EINTR) 148 return; 149 150 /* 151 * For all other types of vm_munmap() failure, either the 152 * system is out of memory or there is bug. 153 */ 154 WARN_ON_ONCE(r); 155 } 156 157 static int shstk_setup(void) 158 { 159 struct thread_shstk *shstk = ¤t->thread.shstk; 160 unsigned long addr, size; 161 162 /* Already enabled */ 163 if (features_enabled(ARCH_SHSTK_SHSTK)) 164 return 0; 165 166 /* Also not supported for 32 bit */ 167 if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) || in_ia32_syscall()) 168 return -EOPNOTSUPP; 169 170 size = adjust_shstk_size(0); 171 addr = alloc_shstk(0, size, 0, false); 172 if (IS_ERR_VALUE(addr)) 173 return PTR_ERR((void *)addr); 174 175 fpregs_lock_and_load(); 176 wrmsrq(MSR_IA32_PL3_SSP, addr + size); 177 wrmsrq(MSR_IA32_U_CET, CET_SHSTK_EN); 178 fpregs_unlock(); 179 180 shstk->base = addr; 181 shstk->size = size; 182 features_set(ARCH_SHSTK_SHSTK); 183 184 return 0; 185 } 186 187 void reset_thread_features(void) 188 { 189 memset(¤t->thread.shstk, 0, sizeof(struct thread_shstk)); 190 current->thread.features = 0; 191 current->thread.features_locked = 0; 192 } 193 194 unsigned long shstk_alloc_thread_stack(struct task_struct *tsk, u64 clone_flags, 195 unsigned long stack_size) 196 { 197 struct thread_shstk *shstk = &tsk->thread.shstk; 198 unsigned long addr, size; 199 200 /* 201 * If shadow stack is not enabled on the new thread, skip any 202 * switch to a new shadow stack. 203 */ 204 if (!features_enabled(ARCH_SHSTK_SHSTK)) 205 return 0; 206 207 /* 208 * For CLONE_VFORK the child will share the parents shadow stack. 209 * Make sure to clear the internal tracking of the thread shadow 210 * stack so the freeing logic run for child knows to leave it alone. 211 */ 212 if (clone_flags & CLONE_VFORK) { 213 shstk->base = 0; 214 shstk->size = 0; 215 return 0; 216 } 217 218 /* 219 * For !CLONE_VM the child will use a copy of the parents shadow 220 * stack. 221 */ 222 if (!(clone_flags & CLONE_VM)) 223 return 0; 224 225 size = adjust_shstk_size(stack_size); 226 addr = alloc_shstk(0, size, 0, false); 227 if (IS_ERR_VALUE(addr)) 228 return addr; 229 230 shstk->base = addr; 231 shstk->size = size; 232 233 return addr + size; 234 } 235 236 static unsigned long get_user_shstk_addr(void) 237 { 238 unsigned long long ssp; 239 240 fpregs_lock_and_load(); 241 242 rdmsrq(MSR_IA32_PL3_SSP, ssp); 243 244 fpregs_unlock(); 245 246 return ssp; 247 } 248 249 int shstk_pop(u64 *val) 250 { 251 int ret = 0; 252 u64 ssp; 253 254 if (!features_enabled(ARCH_SHSTK_SHSTK)) 255 return -ENOTSUPP; 256 257 fpregs_lock_and_load(); 258 259 rdmsrq(MSR_IA32_PL3_SSP, ssp); 260 if (val && get_user(*val, (__user u64 *)ssp)) 261 ret = -EFAULT; 262 else 263 wrmsrq(MSR_IA32_PL3_SSP, ssp + SS_FRAME_SIZE); 264 fpregs_unlock(); 265 266 return ret; 267 } 268 269 int shstk_push(u64 val) 270 { 271 u64 ssp; 272 int ret; 273 274 if (!features_enabled(ARCH_SHSTK_SHSTK)) 275 return -ENOTSUPP; 276 277 fpregs_lock_and_load(); 278 279 rdmsrq(MSR_IA32_PL3_SSP, ssp); 280 ssp -= SS_FRAME_SIZE; 281 ret = write_user_shstk_64((__user void *)ssp, val); 282 if (!ret) 283 wrmsrq(MSR_IA32_PL3_SSP, ssp); 284 fpregs_unlock(); 285 286 return ret; 287 } 288 289 #define SHSTK_DATA_BIT BIT(63) 290 291 static int put_shstk_data(u64 __user *addr, u64 data) 292 { 293 if (WARN_ON_ONCE(data & SHSTK_DATA_BIT)) 294 return -EINVAL; 295 296 /* 297 * Mark the high bit so that the sigframe can't be processed as a 298 * return address. 299 */ 300 if (write_user_shstk_64(addr, data | SHSTK_DATA_BIT)) 301 return -EFAULT; 302 return 0; 303 } 304 305 static int get_shstk_data(unsigned long *data, unsigned long __user *addr) 306 { 307 unsigned long ldata; 308 309 if (unlikely(get_user(ldata, addr))) 310 return -EFAULT; 311 312 if (!(ldata & SHSTK_DATA_BIT)) 313 return -EINVAL; 314 315 *data = ldata & ~SHSTK_DATA_BIT; 316 317 return 0; 318 } 319 320 static int shstk_push_sigframe(unsigned long *ssp) 321 { 322 unsigned long target_ssp = *ssp; 323 324 /* Token must be aligned */ 325 if (!IS_ALIGNED(target_ssp, 8)) 326 return -EINVAL; 327 328 *ssp -= SS_FRAME_SIZE; 329 if (put_shstk_data((void __user *)*ssp, target_ssp)) 330 return -EFAULT; 331 332 return 0; 333 } 334 335 static int shstk_pop_sigframe(unsigned long *ssp) 336 { 337 struct vm_area_struct *vma; 338 unsigned long token_addr; 339 bool need_to_check_vma; 340 int err = 1; 341 342 /* 343 * It is possible for the SSP to be off the end of a shadow stack by 4 344 * or 8 bytes. If the shadow stack is at the start of a page or 4 bytes 345 * before it, it might be this case, so check that the address being 346 * read is actually shadow stack. 347 */ 348 if (!IS_ALIGNED(*ssp, 8)) 349 return -EINVAL; 350 351 need_to_check_vma = PAGE_ALIGN(*ssp) == *ssp; 352 353 if (need_to_check_vma) 354 if (mmap_read_lock_killable(current->mm)) 355 return -EINTR; 356 357 err = get_shstk_data(&token_addr, (unsigned long __user *)*ssp); 358 if (unlikely(err)) 359 goto out_err; 360 361 if (need_to_check_vma) { 362 vma = find_vma(current->mm, *ssp); 363 if (!vma || !(vma->vm_flags & VM_SHADOW_STACK)) { 364 err = -EFAULT; 365 goto out_err; 366 } 367 368 mmap_read_unlock(current->mm); 369 } 370 371 /* Restore SSP aligned? */ 372 if (unlikely(!IS_ALIGNED(token_addr, 8))) 373 return -EINVAL; 374 375 /* SSP in userspace? */ 376 if (unlikely(token_addr >= TASK_SIZE_MAX)) 377 return -EINVAL; 378 379 *ssp = token_addr; 380 381 return 0; 382 out_err: 383 if (need_to_check_vma) 384 mmap_read_unlock(current->mm); 385 return err; 386 } 387 388 int setup_signal_shadow_stack(struct ksignal *ksig) 389 { 390 void __user *restorer = ksig->ka.sa.sa_restorer; 391 unsigned long ssp; 392 int err; 393 394 if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) || 395 !features_enabled(ARCH_SHSTK_SHSTK)) 396 return 0; 397 398 if (!restorer) 399 return -EINVAL; 400 401 ssp = get_user_shstk_addr(); 402 if (unlikely(!ssp)) 403 return -EINVAL; 404 405 err = shstk_push_sigframe(&ssp); 406 if (unlikely(err)) 407 return err; 408 409 /* Push restorer address */ 410 ssp -= SS_FRAME_SIZE; 411 err = write_user_shstk_64((u64 __user *)ssp, (u64)restorer); 412 if (unlikely(err)) 413 return -EFAULT; 414 415 fpregs_lock_and_load(); 416 wrmsrq(MSR_IA32_PL3_SSP, ssp); 417 fpregs_unlock(); 418 419 return 0; 420 } 421 422 int restore_signal_shadow_stack(void) 423 { 424 unsigned long ssp; 425 int err; 426 427 if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) || 428 !features_enabled(ARCH_SHSTK_SHSTK)) 429 return 0; 430 431 ssp = get_user_shstk_addr(); 432 if (unlikely(!ssp)) 433 return -EINVAL; 434 435 err = shstk_pop_sigframe(&ssp); 436 if (unlikely(err)) 437 return err; 438 439 fpregs_lock_and_load(); 440 wrmsrq(MSR_IA32_PL3_SSP, ssp); 441 fpregs_unlock(); 442 443 return 0; 444 } 445 446 void shstk_free(struct task_struct *tsk) 447 { 448 struct thread_shstk *shstk = &tsk->thread.shstk; 449 450 if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) || 451 !features_enabled(ARCH_SHSTK_SHSTK)) 452 return; 453 454 /* 455 * When fork() with CLONE_VM fails, the child (tsk) already has a 456 * shadow stack allocated, and exit_thread() calls this function to 457 * free it. In this case the parent (current) and the child share 458 * the same mm struct. 459 */ 460 if (!tsk->mm || tsk->mm != current->mm) 461 return; 462 463 /* 464 * If shstk->base is NULL, then this task is not managing its 465 * own shadow stack (CLONE_VFORK). So skip freeing it. 466 */ 467 if (!shstk->base) 468 return; 469 470 /* 471 * shstk->base is NULL for CLONE_VFORK child tasks, and so is 472 * normal. But size = 0 on a shstk->base is not normal and 473 * indicated an attempt to free the thread shadow stack twice. 474 * Warn about it. 475 */ 476 if (WARN_ON(!shstk->size)) 477 return; 478 479 unmap_shadow_stack(shstk->base, shstk->size); 480 481 shstk->size = 0; 482 } 483 484 static int wrss_control(bool enable) 485 { 486 u64 msrval; 487 488 if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK)) 489 return -EOPNOTSUPP; 490 491 /* 492 * Only enable WRSS if shadow stack is enabled. If shadow stack is not 493 * enabled, WRSS will already be disabled, so don't bother clearing it 494 * when disabling. 495 */ 496 if (!features_enabled(ARCH_SHSTK_SHSTK)) 497 return -EPERM; 498 499 /* Already enabled/disabled? */ 500 if (features_enabled(ARCH_SHSTK_WRSS) == enable) 501 return 0; 502 503 fpregs_lock_and_load(); 504 rdmsrq(MSR_IA32_U_CET, msrval); 505 506 if (enable) { 507 features_set(ARCH_SHSTK_WRSS); 508 msrval |= CET_WRSS_EN; 509 } else { 510 features_clr(ARCH_SHSTK_WRSS); 511 if (!(msrval & CET_WRSS_EN)) 512 goto unlock; 513 514 msrval &= ~CET_WRSS_EN; 515 } 516 517 wrmsrq(MSR_IA32_U_CET, msrval); 518 519 unlock: 520 fpregs_unlock(); 521 522 return 0; 523 } 524 525 static int shstk_disable(void) 526 { 527 if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK)) 528 return -EOPNOTSUPP; 529 530 /* Already disabled? */ 531 if (!features_enabled(ARCH_SHSTK_SHSTK)) 532 return 0; 533 534 fpregs_lock_and_load(); 535 /* Disable WRSS too when disabling shadow stack */ 536 wrmsrq(MSR_IA32_U_CET, 0); 537 wrmsrq(MSR_IA32_PL3_SSP, 0); 538 fpregs_unlock(); 539 540 shstk_free(current); 541 features_clr(ARCH_SHSTK_SHSTK | ARCH_SHSTK_WRSS); 542 543 return 0; 544 } 545 546 SYSCALL_DEFINE3(map_shadow_stack, unsigned long, addr, unsigned long, size, unsigned int, flags) 547 { 548 bool set_tok = flags & SHADOW_STACK_SET_TOKEN; 549 unsigned long aligned_size; 550 551 if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK)) 552 return -EOPNOTSUPP; 553 554 if (flags & ~SHADOW_STACK_SET_TOKEN) 555 return -EINVAL; 556 557 /* If there isn't space for a token */ 558 if (set_tok && size < 8) 559 return -ENOSPC; 560 561 if (addr && addr < SZ_4G) 562 return -ERANGE; 563 564 /* 565 * An overflow would result in attempting to write the restore token 566 * to the wrong location. Not catastrophic, but just return the right 567 * error code and block it. 568 */ 569 aligned_size = PAGE_ALIGN(size); 570 if (aligned_size < size) 571 return -EOVERFLOW; 572 573 return alloc_shstk(addr, aligned_size, size, set_tok); 574 } 575 576 long shstk_prctl(struct task_struct *task, int option, unsigned long arg2) 577 { 578 unsigned long features = arg2; 579 580 if (option == ARCH_SHSTK_STATUS) { 581 return put_user(task->thread.features, (unsigned long __user *)arg2); 582 } 583 584 if (option == ARCH_SHSTK_LOCK) { 585 task->thread.features_locked |= features; 586 return 0; 587 } 588 589 /* Only allow via ptrace */ 590 if (task != current) { 591 if (option == ARCH_SHSTK_UNLOCK && IS_ENABLED(CONFIG_CHECKPOINT_RESTORE)) { 592 task->thread.features_locked &= ~features; 593 return 0; 594 } 595 return -EINVAL; 596 } 597 598 /* Do not allow to change locked features */ 599 if (features & task->thread.features_locked) 600 return -EPERM; 601 602 /* Only support enabling/disabling one feature at a time. */ 603 if (hweight_long(features) > 1) 604 return -EINVAL; 605 606 if (option == ARCH_SHSTK_DISABLE) { 607 if (features & ARCH_SHSTK_WRSS) 608 return wrss_control(false); 609 if (features & ARCH_SHSTK_SHSTK) 610 return shstk_disable(); 611 return -EINVAL; 612 } 613 614 /* Handle ARCH_SHSTK_ENABLE */ 615 if (features & ARCH_SHSTK_SHSTK) 616 return shstk_setup(); 617 if (features & ARCH_SHSTK_WRSS) 618 return wrss_control(true); 619 return -EINVAL; 620 } 621 622 int shstk_update_last_frame(unsigned long val) 623 { 624 unsigned long ssp; 625 626 if (!features_enabled(ARCH_SHSTK_SHSTK)) 627 return 0; 628 629 ssp = get_user_shstk_addr(); 630 return write_user_shstk_64((u64 __user *)ssp, (u64)val); 631 } 632 633 bool shstk_is_enabled(void) 634 { 635 return features_enabled(ARCH_SHSTK_SHSTK); 636 } 637