1 /* 2 * linux/kernel/seccomp.c 3 * 4 * Copyright 2004-2005 Andrea Arcangeli <andrea@cpushare.com> 5 * 6 * Copyright (C) 2012 Google, Inc. 7 * Will Drewry <wad@chromium.org> 8 * 9 * This defines a simple but solid secure-computing facility. 10 * 11 * Mode 1 uses a fixed list of allowed system calls. 12 * Mode 2 allows user-defined system call filters in the form 13 * of Berkeley Packet Filters/Linux Socket Filters. 14 */ 15 16 #include <linux/atomic.h> 17 #include <linux/audit.h> 18 #include <linux/compat.h> 19 #include <linux/coredump.h> 20 #include <linux/sched.h> 21 #include <linux/sched/task_stack.h> 22 #include <linux/seccomp.h> 23 #include <linux/slab.h> 24 #include <linux/syscalls.h> 25 26 #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER 27 #include <asm/syscall.h> 28 #endif 29 30 #ifdef CONFIG_SECCOMP_FILTER 31 #include <linux/filter.h> 32 #include <linux/pid.h> 33 #include <linux/ptrace.h> 34 #include <linux/security.h> 35 #include <linux/tracehook.h> 36 #include <linux/uaccess.h> 37 38 /** 39 * struct seccomp_filter - container for seccomp BPF programs 40 * 41 * @usage: reference count to manage the object lifetime. 42 * get/put helpers should be used when accessing an instance 43 * outside of a lifetime-guarded section. In general, this 44 * is only needed for handling filters shared across tasks. 45 * @prev: points to a previously installed, or inherited, filter 46 * @prog: the BPF program to evaluate 47 * 48 * seccomp_filter objects are organized in a tree linked via the @prev 49 * pointer. For any task, it appears to be a singly-linked list starting 50 * with current->seccomp.filter, the most recently attached or inherited filter. 51 * However, multiple filters may share a @prev node, by way of fork(), which 52 * results in a unidirectional tree existing in memory. This is similar to 53 * how namespaces work. 54 * 55 * seccomp_filter objects should never be modified after being attached 56 * to a task_struct (other than @usage). 57 */ 58 struct seccomp_filter { 59 atomic_t usage; 60 struct seccomp_filter *prev; 61 struct bpf_prog *prog; 62 }; 63 64 /* Limit any path through the tree to 256KB worth of instructions. */ 65 #define MAX_INSNS_PER_PATH ((1 << 18) / sizeof(struct sock_filter)) 66 67 /* 68 * Endianness is explicitly ignored and left for BPF program authors to manage 69 * as per the specific architecture. 70 */ 71 static void populate_seccomp_data(struct seccomp_data *sd) 72 { 73 struct task_struct *task = current; 74 struct pt_regs *regs = task_pt_regs(task); 75 unsigned long args[6]; 76 77 sd->nr = syscall_get_nr(task, regs); 78 sd->arch = syscall_get_arch(); 79 syscall_get_arguments(task, regs, 0, 6, args); 80 sd->args[0] = args[0]; 81 sd->args[1] = args[1]; 82 sd->args[2] = args[2]; 83 sd->args[3] = args[3]; 84 sd->args[4] = args[4]; 85 sd->args[5] = args[5]; 86 sd->instruction_pointer = KSTK_EIP(task); 87 } 88 89 /** 90 * seccomp_check_filter - verify seccomp filter code 91 * @filter: filter to verify 92 * @flen: length of filter 93 * 94 * Takes a previously checked filter (by bpf_check_classic) and 95 * redirects all filter code that loads struct sk_buff data 96 * and related data through seccomp_bpf_load. It also 97 * enforces length and alignment checking of those loads. 98 * 99 * Returns 0 if the rule set is legal or -EINVAL if not. 100 */ 101 static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen) 102 { 103 int pc; 104 for (pc = 0; pc < flen; pc++) { 105 struct sock_filter *ftest = &filter[pc]; 106 u16 code = ftest->code; 107 u32 k = ftest->k; 108 109 switch (code) { 110 case BPF_LD | BPF_W | BPF_ABS: 111 ftest->code = BPF_LDX | BPF_W | BPF_ABS; 112 /* 32-bit aligned and not out of bounds. */ 113 if (k >= sizeof(struct seccomp_data) || k & 3) 114 return -EINVAL; 115 continue; 116 case BPF_LD | BPF_W | BPF_LEN: 117 ftest->code = BPF_LD | BPF_IMM; 118 ftest->k = sizeof(struct seccomp_data); 119 continue; 120 case BPF_LDX | BPF_W | BPF_LEN: 121 ftest->code = BPF_LDX | BPF_IMM; 122 ftest->k = sizeof(struct seccomp_data); 123 continue; 124 /* Explicitly include allowed calls. */ 125 case BPF_RET | BPF_K: 126 case BPF_RET | BPF_A: 127 case BPF_ALU | BPF_ADD | BPF_K: 128 case BPF_ALU | BPF_ADD | BPF_X: 129 case BPF_ALU | BPF_SUB | BPF_K: 130 case BPF_ALU | BPF_SUB | BPF_X: 131 case BPF_ALU | BPF_MUL | BPF_K: 132 case BPF_ALU | BPF_MUL | BPF_X: 133 case BPF_ALU | BPF_DIV | BPF_K: 134 case BPF_ALU | BPF_DIV | BPF_X: 135 case BPF_ALU | BPF_AND | BPF_K: 136 case BPF_ALU | BPF_AND | BPF_X: 137 case BPF_ALU | BPF_OR | BPF_K: 138 case BPF_ALU | BPF_OR | BPF_X: 139 case BPF_ALU | BPF_XOR | BPF_K: 140 case BPF_ALU | BPF_XOR | BPF_X: 141 case BPF_ALU | BPF_LSH | BPF_K: 142 case BPF_ALU | BPF_LSH | BPF_X: 143 case BPF_ALU | BPF_RSH | BPF_K: 144 case BPF_ALU | BPF_RSH | BPF_X: 145 case BPF_ALU | BPF_NEG: 146 case BPF_LD | BPF_IMM: 147 case BPF_LDX | BPF_IMM: 148 case BPF_MISC | BPF_TAX: 149 case BPF_MISC | BPF_TXA: 150 case BPF_LD | BPF_MEM: 151 case BPF_LDX | BPF_MEM: 152 case BPF_ST: 153 case BPF_STX: 154 case BPF_JMP | BPF_JA: 155 case BPF_JMP | BPF_JEQ | BPF_K: 156 case BPF_JMP | BPF_JEQ | BPF_X: 157 case BPF_JMP | BPF_JGE | BPF_K: 158 case BPF_JMP | BPF_JGE | BPF_X: 159 case BPF_JMP | BPF_JGT | BPF_K: 160 case BPF_JMP | BPF_JGT | BPF_X: 161 case BPF_JMP | BPF_JSET | BPF_K: 162 case BPF_JMP | BPF_JSET | BPF_X: 163 continue; 164 default: 165 return -EINVAL; 166 } 167 } 168 return 0; 169 } 170 171 /** 172 * seccomp_run_filters - evaluates all seccomp filters against @sd 173 * @sd: optional seccomp data to be passed to filters 174 * 175 * Returns valid seccomp BPF response codes. 176 */ 177 static u32 seccomp_run_filters(const struct seccomp_data *sd) 178 { 179 struct seccomp_data sd_local; 180 u32 ret = SECCOMP_RET_ALLOW; 181 /* Make sure cross-thread synced filter points somewhere sane. */ 182 struct seccomp_filter *f = 183 lockless_dereference(current->seccomp.filter); 184 185 /* Ensure unexpected behavior doesn't result in failing open. */ 186 if (unlikely(WARN_ON(f == NULL))) 187 return SECCOMP_RET_KILL; 188 189 if (!sd) { 190 populate_seccomp_data(&sd_local); 191 sd = &sd_local; 192 } 193 194 /* 195 * All filters in the list are evaluated and the lowest BPF return 196 * value always takes priority (ignoring the DATA). 197 */ 198 for (; f; f = f->prev) { 199 u32 cur_ret = BPF_PROG_RUN(f->prog, sd); 200 201 if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION)) 202 ret = cur_ret; 203 } 204 return ret; 205 } 206 #endif /* CONFIG_SECCOMP_FILTER */ 207 208 static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode) 209 { 210 assert_spin_locked(¤t->sighand->siglock); 211 212 if (current->seccomp.mode && current->seccomp.mode != seccomp_mode) 213 return false; 214 215 return true; 216 } 217 218 static inline void seccomp_assign_mode(struct task_struct *task, 219 unsigned long seccomp_mode) 220 { 221 assert_spin_locked(&task->sighand->siglock); 222 223 task->seccomp.mode = seccomp_mode; 224 /* 225 * Make sure TIF_SECCOMP cannot be set before the mode (and 226 * filter) is set. 227 */ 228 smp_mb__before_atomic(); 229 set_tsk_thread_flag(task, TIF_SECCOMP); 230 } 231 232 #ifdef CONFIG_SECCOMP_FILTER 233 /* Returns 1 if the parent is an ancestor of the child. */ 234 static int is_ancestor(struct seccomp_filter *parent, 235 struct seccomp_filter *child) 236 { 237 /* NULL is the root ancestor. */ 238 if (parent == NULL) 239 return 1; 240 for (; child; child = child->prev) 241 if (child == parent) 242 return 1; 243 return 0; 244 } 245 246 /** 247 * seccomp_can_sync_threads: checks if all threads can be synchronized 248 * 249 * Expects sighand and cred_guard_mutex locks to be held. 250 * 251 * Returns 0 on success, -ve on error, or the pid of a thread which was 252 * either not in the correct seccomp mode or it did not have an ancestral 253 * seccomp filter. 254 */ 255 static inline pid_t seccomp_can_sync_threads(void) 256 { 257 struct task_struct *thread, *caller; 258 259 BUG_ON(!mutex_is_locked(¤t->signal->cred_guard_mutex)); 260 assert_spin_locked(¤t->sighand->siglock); 261 262 /* Validate all threads being eligible for synchronization. */ 263 caller = current; 264 for_each_thread(caller, thread) { 265 pid_t failed; 266 267 /* Skip current, since it is initiating the sync. */ 268 if (thread == caller) 269 continue; 270 271 if (thread->seccomp.mode == SECCOMP_MODE_DISABLED || 272 (thread->seccomp.mode == SECCOMP_MODE_FILTER && 273 is_ancestor(thread->seccomp.filter, 274 caller->seccomp.filter))) 275 continue; 276 277 /* Return the first thread that cannot be synchronized. */ 278 failed = task_pid_vnr(thread); 279 /* If the pid cannot be resolved, then return -ESRCH */ 280 if (unlikely(WARN_ON(failed == 0))) 281 failed = -ESRCH; 282 return failed; 283 } 284 285 return 0; 286 } 287 288 /** 289 * seccomp_sync_threads: sets all threads to use current's filter 290 * 291 * Expects sighand and cred_guard_mutex locks to be held, and for 292 * seccomp_can_sync_threads() to have returned success already 293 * without dropping the locks. 294 * 295 */ 296 static inline void seccomp_sync_threads(void) 297 { 298 struct task_struct *thread, *caller; 299 300 BUG_ON(!mutex_is_locked(¤t->signal->cred_guard_mutex)); 301 assert_spin_locked(¤t->sighand->siglock); 302 303 /* Synchronize all threads. */ 304 caller = current; 305 for_each_thread(caller, thread) { 306 /* Skip current, since it needs no changes. */ 307 if (thread == caller) 308 continue; 309 310 /* Get a task reference for the new leaf node. */ 311 get_seccomp_filter(caller); 312 /* 313 * Drop the task reference to the shared ancestor since 314 * current's path will hold a reference. (This also 315 * allows a put before the assignment.) 316 */ 317 put_seccomp_filter(thread); 318 smp_store_release(&thread->seccomp.filter, 319 caller->seccomp.filter); 320 321 /* 322 * Don't let an unprivileged task work around 323 * the no_new_privs restriction by creating 324 * a thread that sets it up, enters seccomp, 325 * then dies. 326 */ 327 if (task_no_new_privs(caller)) 328 task_set_no_new_privs(thread); 329 330 /* 331 * Opt the other thread into seccomp if needed. 332 * As threads are considered to be trust-realm 333 * equivalent (see ptrace_may_access), it is safe to 334 * allow one thread to transition the other. 335 */ 336 if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) 337 seccomp_assign_mode(thread, SECCOMP_MODE_FILTER); 338 } 339 } 340 341 /** 342 * seccomp_prepare_filter: Prepares a seccomp filter for use. 343 * @fprog: BPF program to install 344 * 345 * Returns filter on success or an ERR_PTR on failure. 346 */ 347 static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog) 348 { 349 struct seccomp_filter *sfilter; 350 int ret; 351 const bool save_orig = IS_ENABLED(CONFIG_CHECKPOINT_RESTORE); 352 353 if (fprog->len == 0 || fprog->len > BPF_MAXINSNS) 354 return ERR_PTR(-EINVAL); 355 356 BUG_ON(INT_MAX / fprog->len < sizeof(struct sock_filter)); 357 358 /* 359 * Installing a seccomp filter requires that the task has 360 * CAP_SYS_ADMIN in its namespace or be running with no_new_privs. 361 * This avoids scenarios where unprivileged tasks can affect the 362 * behavior of privileged children. 363 */ 364 if (!task_no_new_privs(current) && 365 security_capable_noaudit(current_cred(), current_user_ns(), 366 CAP_SYS_ADMIN) != 0) 367 return ERR_PTR(-EACCES); 368 369 /* Allocate a new seccomp_filter */ 370 sfilter = kzalloc(sizeof(*sfilter), GFP_KERNEL | __GFP_NOWARN); 371 if (!sfilter) 372 return ERR_PTR(-ENOMEM); 373 374 ret = bpf_prog_create_from_user(&sfilter->prog, fprog, 375 seccomp_check_filter, save_orig); 376 if (ret < 0) { 377 kfree(sfilter); 378 return ERR_PTR(ret); 379 } 380 381 atomic_set(&sfilter->usage, 1); 382 383 return sfilter; 384 } 385 386 /** 387 * seccomp_prepare_user_filter - prepares a user-supplied sock_fprog 388 * @user_filter: pointer to the user data containing a sock_fprog. 389 * 390 * Returns 0 on success and non-zero otherwise. 391 */ 392 static struct seccomp_filter * 393 seccomp_prepare_user_filter(const char __user *user_filter) 394 { 395 struct sock_fprog fprog; 396 struct seccomp_filter *filter = ERR_PTR(-EFAULT); 397 398 #ifdef CONFIG_COMPAT 399 if (in_compat_syscall()) { 400 struct compat_sock_fprog fprog32; 401 if (copy_from_user(&fprog32, user_filter, sizeof(fprog32))) 402 goto out; 403 fprog.len = fprog32.len; 404 fprog.filter = compat_ptr(fprog32.filter); 405 } else /* falls through to the if below. */ 406 #endif 407 if (copy_from_user(&fprog, user_filter, sizeof(fprog))) 408 goto out; 409 filter = seccomp_prepare_filter(&fprog); 410 out: 411 return filter; 412 } 413 414 /** 415 * seccomp_attach_filter: validate and attach filter 416 * @flags: flags to change filter behavior 417 * @filter: seccomp filter to add to the current process 418 * 419 * Caller must be holding current->sighand->siglock lock. 420 * 421 * Returns 0 on success, -ve on error. 422 */ 423 static long seccomp_attach_filter(unsigned int flags, 424 struct seccomp_filter *filter) 425 { 426 unsigned long total_insns; 427 struct seccomp_filter *walker; 428 429 assert_spin_locked(¤t->sighand->siglock); 430 431 /* Validate resulting filter length. */ 432 total_insns = filter->prog->len; 433 for (walker = current->seccomp.filter; walker; walker = walker->prev) 434 total_insns += walker->prog->len + 4; /* 4 instr penalty */ 435 if (total_insns > MAX_INSNS_PER_PATH) 436 return -ENOMEM; 437 438 /* If thread sync has been requested, check that it is possible. */ 439 if (flags & SECCOMP_FILTER_FLAG_TSYNC) { 440 int ret; 441 442 ret = seccomp_can_sync_threads(); 443 if (ret) 444 return ret; 445 } 446 447 /* 448 * If there is an existing filter, make it the prev and don't drop its 449 * task reference. 450 */ 451 filter->prev = current->seccomp.filter; 452 current->seccomp.filter = filter; 453 454 /* Now that the new filter is in place, synchronize to all threads. */ 455 if (flags & SECCOMP_FILTER_FLAG_TSYNC) 456 seccomp_sync_threads(); 457 458 return 0; 459 } 460 461 /* get_seccomp_filter - increments the reference count of the filter on @tsk */ 462 void get_seccomp_filter(struct task_struct *tsk) 463 { 464 struct seccomp_filter *orig = tsk->seccomp.filter; 465 if (!orig) 466 return; 467 /* Reference count is bounded by the number of total processes. */ 468 atomic_inc(&orig->usage); 469 } 470 471 static inline void seccomp_filter_free(struct seccomp_filter *filter) 472 { 473 if (filter) { 474 bpf_prog_destroy(filter->prog); 475 kfree(filter); 476 } 477 } 478 479 /* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */ 480 void put_seccomp_filter(struct task_struct *tsk) 481 { 482 struct seccomp_filter *orig = tsk->seccomp.filter; 483 /* Clean up single-reference branches iteratively. */ 484 while (orig && atomic_dec_and_test(&orig->usage)) { 485 struct seccomp_filter *freeme = orig; 486 orig = orig->prev; 487 seccomp_filter_free(freeme); 488 } 489 } 490 491 static void seccomp_init_siginfo(siginfo_t *info, int syscall, int reason) 492 { 493 memset(info, 0, sizeof(*info)); 494 info->si_signo = SIGSYS; 495 info->si_code = SYS_SECCOMP; 496 info->si_call_addr = (void __user *)KSTK_EIP(current); 497 info->si_errno = reason; 498 info->si_arch = syscall_get_arch(); 499 info->si_syscall = syscall; 500 } 501 502 /** 503 * seccomp_send_sigsys - signals the task to allow in-process syscall emulation 504 * @syscall: syscall number to send to userland 505 * @reason: filter-supplied reason code to send to userland (via si_errno) 506 * 507 * Forces a SIGSYS with a code of SYS_SECCOMP and related sigsys info. 508 */ 509 static void seccomp_send_sigsys(int syscall, int reason) 510 { 511 struct siginfo info; 512 seccomp_init_siginfo(&info, syscall, reason); 513 force_sig_info(SIGSYS, &info, current); 514 } 515 #endif /* CONFIG_SECCOMP_FILTER */ 516 517 /* 518 * Secure computing mode 1 allows only read/write/exit/sigreturn. 519 * To be fully secure this must be combined with rlimit 520 * to limit the stack allocations too. 521 */ 522 static const int mode1_syscalls[] = { 523 __NR_seccomp_read, __NR_seccomp_write, __NR_seccomp_exit, __NR_seccomp_sigreturn, 524 0, /* null terminated */ 525 }; 526 527 static void __secure_computing_strict(int this_syscall) 528 { 529 const int *syscall_whitelist = mode1_syscalls; 530 #ifdef CONFIG_COMPAT 531 if (in_compat_syscall()) 532 syscall_whitelist = get_compat_mode1_syscalls(); 533 #endif 534 do { 535 if (*syscall_whitelist == this_syscall) 536 return; 537 } while (*++syscall_whitelist); 538 539 #ifdef SECCOMP_DEBUG 540 dump_stack(); 541 #endif 542 audit_seccomp(this_syscall, SIGKILL, SECCOMP_RET_KILL); 543 do_exit(SIGKILL); 544 } 545 546 #ifndef CONFIG_HAVE_ARCH_SECCOMP_FILTER 547 void secure_computing_strict(int this_syscall) 548 { 549 int mode = current->seccomp.mode; 550 551 if (IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) && 552 unlikely(current->ptrace & PT_SUSPEND_SECCOMP)) 553 return; 554 555 if (mode == SECCOMP_MODE_DISABLED) 556 return; 557 else if (mode == SECCOMP_MODE_STRICT) 558 __secure_computing_strict(this_syscall); 559 else 560 BUG(); 561 } 562 #else 563 564 #ifdef CONFIG_SECCOMP_FILTER 565 static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, 566 const bool recheck_after_trace) 567 { 568 u32 filter_ret, action; 569 int data; 570 571 /* 572 * Make sure that any changes to mode from another thread have 573 * been seen after TIF_SECCOMP was seen. 574 */ 575 rmb(); 576 577 filter_ret = seccomp_run_filters(sd); 578 data = filter_ret & SECCOMP_RET_DATA; 579 action = filter_ret & SECCOMP_RET_ACTION; 580 581 switch (action) { 582 case SECCOMP_RET_ERRNO: 583 /* Set low-order bits as an errno, capped at MAX_ERRNO. */ 584 if (data > MAX_ERRNO) 585 data = MAX_ERRNO; 586 syscall_set_return_value(current, task_pt_regs(current), 587 -data, 0); 588 goto skip; 589 590 case SECCOMP_RET_TRAP: 591 /* Show the handler the original registers. */ 592 syscall_rollback(current, task_pt_regs(current)); 593 /* Let the filter pass back 16 bits of data. */ 594 seccomp_send_sigsys(this_syscall, data); 595 goto skip; 596 597 case SECCOMP_RET_TRACE: 598 /* We've been put in this state by the ptracer already. */ 599 if (recheck_after_trace) 600 return 0; 601 602 /* ENOSYS these calls if there is no tracer attached. */ 603 if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) { 604 syscall_set_return_value(current, 605 task_pt_regs(current), 606 -ENOSYS, 0); 607 goto skip; 608 } 609 610 /* Allow the BPF to provide the event message */ 611 ptrace_event(PTRACE_EVENT_SECCOMP, data); 612 /* 613 * The delivery of a fatal signal during event 614 * notification may silently skip tracer notification, 615 * which could leave us with a potentially unmodified 616 * syscall that the tracer would have liked to have 617 * changed. Since the process is about to die, we just 618 * force the syscall to be skipped and let the signal 619 * kill the process and correctly handle any tracer exit 620 * notifications. 621 */ 622 if (fatal_signal_pending(current)) 623 goto skip; 624 /* Check if the tracer forced the syscall to be skipped. */ 625 this_syscall = syscall_get_nr(current, task_pt_regs(current)); 626 if (this_syscall < 0) 627 goto skip; 628 629 /* 630 * Recheck the syscall, since it may have changed. This 631 * intentionally uses a NULL struct seccomp_data to force 632 * a reload of all registers. This does not goto skip since 633 * a skip would have already been reported. 634 */ 635 if (__seccomp_filter(this_syscall, NULL, true)) 636 return -1; 637 638 return 0; 639 640 case SECCOMP_RET_ALLOW: 641 return 0; 642 643 case SECCOMP_RET_KILL: 644 default: { 645 siginfo_t info; 646 audit_seccomp(this_syscall, SIGSYS, action); 647 /* Dump core only if this is the last remaining thread. */ 648 if (get_nr_threads(current) == 1) { 649 /* Show the original registers in the dump. */ 650 syscall_rollback(current, task_pt_regs(current)); 651 /* Trigger a manual coredump since do_exit skips it. */ 652 seccomp_init_siginfo(&info, this_syscall, data); 653 do_coredump(&info); 654 } 655 do_exit(SIGSYS); 656 } 657 } 658 659 unreachable(); 660 661 skip: 662 audit_seccomp(this_syscall, 0, action); 663 return -1; 664 } 665 #else 666 static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, 667 const bool recheck_after_trace) 668 { 669 BUG(); 670 } 671 #endif 672 673 int __secure_computing(const struct seccomp_data *sd) 674 { 675 int mode = current->seccomp.mode; 676 int this_syscall; 677 678 if (IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) && 679 unlikely(current->ptrace & PT_SUSPEND_SECCOMP)) 680 return 0; 681 682 this_syscall = sd ? sd->nr : 683 syscall_get_nr(current, task_pt_regs(current)); 684 685 switch (mode) { 686 case SECCOMP_MODE_STRICT: 687 __secure_computing_strict(this_syscall); /* may call do_exit */ 688 return 0; 689 case SECCOMP_MODE_FILTER: 690 return __seccomp_filter(this_syscall, sd, false); 691 default: 692 BUG(); 693 } 694 } 695 #endif /* CONFIG_HAVE_ARCH_SECCOMP_FILTER */ 696 697 long prctl_get_seccomp(void) 698 { 699 return current->seccomp.mode; 700 } 701 702 /** 703 * seccomp_set_mode_strict: internal function for setting strict seccomp 704 * 705 * Once current->seccomp.mode is non-zero, it may not be changed. 706 * 707 * Returns 0 on success or -EINVAL on failure. 708 */ 709 static long seccomp_set_mode_strict(void) 710 { 711 const unsigned long seccomp_mode = SECCOMP_MODE_STRICT; 712 long ret = -EINVAL; 713 714 spin_lock_irq(¤t->sighand->siglock); 715 716 if (!seccomp_may_assign_mode(seccomp_mode)) 717 goto out; 718 719 #ifdef TIF_NOTSC 720 disable_TSC(); 721 #endif 722 seccomp_assign_mode(current, seccomp_mode); 723 ret = 0; 724 725 out: 726 spin_unlock_irq(¤t->sighand->siglock); 727 728 return ret; 729 } 730 731 #ifdef CONFIG_SECCOMP_FILTER 732 /** 733 * seccomp_set_mode_filter: internal function for setting seccomp filter 734 * @flags: flags to change filter behavior 735 * @filter: struct sock_fprog containing filter 736 * 737 * This function may be called repeatedly to install additional filters. 738 * Every filter successfully installed will be evaluated (in reverse order) 739 * for each system call the task makes. 740 * 741 * Once current->seccomp.mode is non-zero, it may not be changed. 742 * 743 * Returns 0 on success or -EINVAL on failure. 744 */ 745 static long seccomp_set_mode_filter(unsigned int flags, 746 const char __user *filter) 747 { 748 const unsigned long seccomp_mode = SECCOMP_MODE_FILTER; 749 struct seccomp_filter *prepared = NULL; 750 long ret = -EINVAL; 751 752 /* Validate flags. */ 753 if (flags & ~SECCOMP_FILTER_FLAG_MASK) 754 return -EINVAL; 755 756 /* Prepare the new filter before holding any locks. */ 757 prepared = seccomp_prepare_user_filter(filter); 758 if (IS_ERR(prepared)) 759 return PTR_ERR(prepared); 760 761 /* 762 * Make sure we cannot change seccomp or nnp state via TSYNC 763 * while another thread is in the middle of calling exec. 764 */ 765 if (flags & SECCOMP_FILTER_FLAG_TSYNC && 766 mutex_lock_killable(¤t->signal->cred_guard_mutex)) 767 goto out_free; 768 769 spin_lock_irq(¤t->sighand->siglock); 770 771 if (!seccomp_may_assign_mode(seccomp_mode)) 772 goto out; 773 774 ret = seccomp_attach_filter(flags, prepared); 775 if (ret) 776 goto out; 777 /* Do not free the successfully attached filter. */ 778 prepared = NULL; 779 780 seccomp_assign_mode(current, seccomp_mode); 781 out: 782 spin_unlock_irq(¤t->sighand->siglock); 783 if (flags & SECCOMP_FILTER_FLAG_TSYNC) 784 mutex_unlock(¤t->signal->cred_guard_mutex); 785 out_free: 786 seccomp_filter_free(prepared); 787 return ret; 788 } 789 #else 790 static inline long seccomp_set_mode_filter(unsigned int flags, 791 const char __user *filter) 792 { 793 return -EINVAL; 794 } 795 #endif 796 797 /* Common entry point for both prctl and syscall. */ 798 static long do_seccomp(unsigned int op, unsigned int flags, 799 const char __user *uargs) 800 { 801 switch (op) { 802 case SECCOMP_SET_MODE_STRICT: 803 if (flags != 0 || uargs != NULL) 804 return -EINVAL; 805 return seccomp_set_mode_strict(); 806 case SECCOMP_SET_MODE_FILTER: 807 return seccomp_set_mode_filter(flags, uargs); 808 default: 809 return -EINVAL; 810 } 811 } 812 813 SYSCALL_DEFINE3(seccomp, unsigned int, op, unsigned int, flags, 814 const char __user *, uargs) 815 { 816 return do_seccomp(op, flags, uargs); 817 } 818 819 /** 820 * prctl_set_seccomp: configures current->seccomp.mode 821 * @seccomp_mode: requested mode to use 822 * @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER 823 * 824 * Returns 0 on success or -EINVAL on failure. 825 */ 826 long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter) 827 { 828 unsigned int op; 829 char __user *uargs; 830 831 switch (seccomp_mode) { 832 case SECCOMP_MODE_STRICT: 833 op = SECCOMP_SET_MODE_STRICT; 834 /* 835 * Setting strict mode through prctl always ignored filter, 836 * so make sure it is always NULL here to pass the internal 837 * check in do_seccomp(). 838 */ 839 uargs = NULL; 840 break; 841 case SECCOMP_MODE_FILTER: 842 op = SECCOMP_SET_MODE_FILTER; 843 uargs = filter; 844 break; 845 default: 846 return -EINVAL; 847 } 848 849 /* prctl interface doesn't have flags, so they are always zero. */ 850 return do_seccomp(op, 0, uargs); 851 } 852 853 #if defined(CONFIG_SECCOMP_FILTER) && defined(CONFIG_CHECKPOINT_RESTORE) 854 long seccomp_get_filter(struct task_struct *task, unsigned long filter_off, 855 void __user *data) 856 { 857 struct seccomp_filter *filter; 858 struct sock_fprog_kern *fprog; 859 long ret; 860 unsigned long count = 0; 861 862 if (!capable(CAP_SYS_ADMIN) || 863 current->seccomp.mode != SECCOMP_MODE_DISABLED) { 864 return -EACCES; 865 } 866 867 spin_lock_irq(&task->sighand->siglock); 868 if (task->seccomp.mode != SECCOMP_MODE_FILTER) { 869 ret = -EINVAL; 870 goto out; 871 } 872 873 filter = task->seccomp.filter; 874 while (filter) { 875 filter = filter->prev; 876 count++; 877 } 878 879 if (filter_off >= count) { 880 ret = -ENOENT; 881 goto out; 882 } 883 count -= filter_off; 884 885 filter = task->seccomp.filter; 886 while (filter && count > 1) { 887 filter = filter->prev; 888 count--; 889 } 890 891 if (WARN_ON(count != 1 || !filter)) { 892 /* The filter tree shouldn't shrink while we're using it. */ 893 ret = -ENOENT; 894 goto out; 895 } 896 897 fprog = filter->prog->orig_prog; 898 if (!fprog) { 899 /* This must be a new non-cBPF filter, since we save 900 * every cBPF filter's orig_prog above when 901 * CONFIG_CHECKPOINT_RESTORE is enabled. 902 */ 903 ret = -EMEDIUMTYPE; 904 goto out; 905 } 906 907 ret = fprog->len; 908 if (!data) 909 goto out; 910 911 get_seccomp_filter(task); 912 spin_unlock_irq(&task->sighand->siglock); 913 914 if (copy_to_user(data, fprog->filter, bpf_classic_proglen(fprog))) 915 ret = -EFAULT; 916 917 put_seccomp_filter(task); 918 return ret; 919 920 out: 921 spin_unlock_irq(&task->sighand->siglock); 922 return ret; 923 } 924 #endif 925