126ccf4f1SKonstantin Belousov /*- 2df57947fSPedro F. Giffuni * SPDX-License-Identifier: BSD-4-Clause 3df57947fSPedro F. Giffuni * 426ccf4f1SKonstantin Belousov * Copyright (C) 1994, David Greenman 526ccf4f1SKonstantin Belousov * Copyright (c) 1990, 1993 626ccf4f1SKonstantin Belousov * The Regents of the University of California. All rights reserved. 726ccf4f1SKonstantin Belousov * Copyright (C) 2010 Konstantin Belousov <kib@freebsd.org> 826ccf4f1SKonstantin Belousov * 926ccf4f1SKonstantin Belousov * This code is derived from software contributed to Berkeley by 1026ccf4f1SKonstantin Belousov * the University of Utah, and William Jolitz. 1126ccf4f1SKonstantin Belousov * 1226ccf4f1SKonstantin Belousov * Redistribution and use in source and binary forms, with or without 1326ccf4f1SKonstantin Belousov * modification, are permitted provided that the following conditions 1426ccf4f1SKonstantin Belousov * are met: 1526ccf4f1SKonstantin Belousov * 1. Redistributions of source code must retain the above copyright 1626ccf4f1SKonstantin Belousov * notice, this list of conditions and the following disclaimer. 1726ccf4f1SKonstantin Belousov * 2. Redistributions in binary form must reproduce the above copyright 1826ccf4f1SKonstantin Belousov * notice, this list of conditions and the following disclaimer in the 1926ccf4f1SKonstantin Belousov * documentation and/or other materials provided with the distribution. 2026ccf4f1SKonstantin Belousov * 3. All advertising materials mentioning features or use of this software 2126ccf4f1SKonstantin Belousov * must display the following acknowledgement: 2226ccf4f1SKonstantin Belousov * This product includes software developed by the University of 2326ccf4f1SKonstantin Belousov * California, Berkeley and its contributors. 2426ccf4f1SKonstantin Belousov * 4. Neither the name of the University nor the names of its contributors 2526ccf4f1SKonstantin Belousov * may be used to endorse or promote products derived from this software 2626ccf4f1SKonstantin Belousov * without specific prior written permission. 2726ccf4f1SKonstantin Belousov * 2826ccf4f1SKonstantin Belousov * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 2926ccf4f1SKonstantin Belousov * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 3026ccf4f1SKonstantin Belousov * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 3126ccf4f1SKonstantin Belousov * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 3226ccf4f1SKonstantin Belousov * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 3326ccf4f1SKonstantin Belousov * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 3426ccf4f1SKonstantin Belousov * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 3526ccf4f1SKonstantin Belousov * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 3626ccf4f1SKonstantin Belousov * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 3726ccf4f1SKonstantin Belousov * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3826ccf4f1SKonstantin Belousov * SUCH DAMAGE. 3926ccf4f1SKonstantin Belousov * 4026ccf4f1SKonstantin Belousov * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 4126ccf4f1SKonstantin Belousov */ 4226ccf4f1SKonstantin Belousov 4326ccf4f1SKonstantin Belousov #include "opt_capsicum.h" 4426ccf4f1SKonstantin Belousov #include "opt_ktrace.h" 4526ccf4f1SKonstantin Belousov 4626ccf4f1SKonstantin Belousov __FBSDID("$FreeBSD$"); 4726ccf4f1SKonstantin Belousov 484a144410SRobert Watson #include <sys/capsicum.h> 4926ccf4f1SKonstantin Belousov #include <sys/ktr.h> 50fef09913SGleb Smirnoff #include <sys/vmmeter.h> 5126ccf4f1SKonstantin Belousov #ifdef KTRACE 5226ccf4f1SKonstantin Belousov #include <sys/uio.h> 5326ccf4f1SKonstantin Belousov #include <sys/ktrace.h> 5426ccf4f1SKonstantin Belousov #endif 5526ccf4f1SKonstantin Belousov #include <security/audit/audit.h> 5626ccf4f1SKonstantin Belousov 57c18ca749SJohn Baldwin static inline void 582d88da2fSKonstantin Belousov syscallenter(struct thread *td) 5926ccf4f1SKonstantin Belousov { 6026ccf4f1SKonstantin Belousov struct proc *p; 612d88da2fSKonstantin Belousov struct syscall_args *sa; 62bdc0cb4eSEdward Tomasz Napierala struct sysent *se; 6326ccf4f1SKonstantin Belousov int error, traced; 64a1bd83feSEdward Tomasz Napierala bool sy_thr_static; 6526ccf4f1SKonstantin Belousov 6683c9dea1SGleb Smirnoff VM_CNT_INC(v_syscall); 6726ccf4f1SKonstantin Belousov p = td->td_proc; 682d88da2fSKonstantin Belousov sa = &td->td_sa; 6926ccf4f1SKonstantin Belousov 7026ccf4f1SKonstantin Belousov td->td_pticks = 0; 71*b53133a7SMateusz Guzik if (__predict_false(td->td_cowgen != atomic_load_int(&p->p_cowgen))) 724ea6a9a2SMateusz Guzik thread_cow_update(td); 73bdd64116SJohn Baldwin traced = (p->p_flag & P_TRACED) != 0; 740e84a878SMateusz Guzik if (__predict_false(traced || td->td_dbgflags & TDB_USERWR)) { 7526ccf4f1SKonstantin Belousov PROC_LOCK(p); 7626ccf4f1SKonstantin Belousov td->td_dbgflags &= ~TDB_USERWR; 77bdd64116SJohn Baldwin if (traced) 7826ccf4f1SKonstantin Belousov td->td_dbgflags |= TDB_SCE; 7926ccf4f1SKonstantin Belousov PROC_UNLOCK(p); 80bdd64116SJohn Baldwin } 812d88da2fSKonstantin Belousov error = (p->p_sysent->sv_fetch_syscall_args)(td); 82bdc0cb4eSEdward Tomasz Napierala se = sa->callp; 8326ccf4f1SKonstantin Belousov #ifdef KTRACE 8426ccf4f1SKonstantin Belousov if (KTRPOINT(td, KTR_SYSCALL)) 85bdc0cb4eSEdward Tomasz Napierala ktrsyscall(sa->code, se->sy_narg, sa->args); 8626ccf4f1SKonstantin Belousov #endif 874c44811cSJeff Roberson KTR_START4(KTR_SYSC, "syscall", syscallname(p, sa->code), 887fc3ae51SOleksandr Tymoshenko (uintptr_t)td, "pid:%d", td->td_proc->p_pid, "arg0:%p", sa->args[0], 894c44811cSJeff Roberson "arg1:%p", sa->args[1], "arg2:%p", sa->args[2]); 9026ccf4f1SKonstantin Belousov 910e84a878SMateusz Guzik if (__predict_false(error != 0)) { 921af9474bSJohn Baldwin td->td_errno = error; 93c26541e3SJohn Baldwin goto retval; 941af9474bSJohn Baldwin } 954c44811cSJeff Roberson 9634098649SEdward Tomasz Napierala if (__predict_false(traced)) { 97343b391fSKonstantin Belousov PROC_LOCK(p); 988d570f64SJohn Baldwin if (p->p_ptevents & PTRACE_SCE) 9982a4538fSEric Badger ptracestop((td), SIGTRAP, NULL); 100343b391fSKonstantin Belousov PROC_UNLOCK(p); 101da45ea6bSEdward Tomasz Napierala 102da45ea6bSEdward Tomasz Napierala if ((td->td_dbgflags & TDB_USERWR) != 0) { 10326ccf4f1SKonstantin Belousov /* 104c26541e3SJohn Baldwin * Reread syscall number and arguments if debugger 105c26541e3SJohn Baldwin * modified registers or memory. 10626ccf4f1SKonstantin Belousov */ 1072d88da2fSKonstantin Belousov error = (p->p_sysent->sv_fetch_syscall_args)(td); 108bdc0cb4eSEdward Tomasz Napierala se = sa->callp; 10926ccf4f1SKonstantin Belousov #ifdef KTRACE 11026ccf4f1SKonstantin Belousov if (KTRPOINT(td, KTR_SYSCALL)) 111bdc0cb4eSEdward Tomasz Napierala ktrsyscall(sa->code, se->sy_narg, sa->args); 11226ccf4f1SKonstantin Belousov #endif 1131af9474bSJohn Baldwin if (error != 0) { 1141af9474bSJohn Baldwin td->td_errno = error; 11526ccf4f1SKonstantin Belousov goto retval; 11626ccf4f1SKonstantin Belousov } 1171af9474bSJohn Baldwin } 118da45ea6bSEdward Tomasz Napierala } 11926ccf4f1SKonstantin Belousov 12026ccf4f1SKonstantin Belousov #ifdef CAPABILITY_MODE 12126ccf4f1SKonstantin Belousov /* 12226ccf4f1SKonstantin Belousov * In capability mode, we only allow access to system calls 12326ccf4f1SKonstantin Belousov * flagged with SYF_CAPENABLED. 12426ccf4f1SKonstantin Belousov */ 1250e84a878SMateusz Guzik if (__predict_false(IN_CAPABILITY_MODE(td) && 126bdc0cb4eSEdward Tomasz Napierala (se->sy_flags & SYF_CAPENABLED) == 0)) { 1271af9474bSJohn Baldwin td->td_errno = error = ECAPMODE; 12826ccf4f1SKonstantin Belousov goto retval; 12926ccf4f1SKonstantin Belousov } 13026ccf4f1SKonstantin Belousov #endif 13126ccf4f1SKonstantin Belousov 132146fc63fSKonstantin Belousov /* 133a113b17fSKonstantin Belousov * Fetch fast sigblock value at the time of syscall entry to 134a113b17fSKonstantin Belousov * handle sleepqueue primitives which might call cursig(). 135146fc63fSKonstantin Belousov */ 136a113b17fSKonstantin Belousov if (__predict_false(sigfastblock_fetch_always)) 137a113b17fSKonstantin Belousov (void)sigfastblock_fetch(td); 138146fc63fSKonstantin Belousov 1392f729243SMateusz Guzik /* Let system calls set td_errno directly. */ 1404c6f466cSEdward Tomasz Napierala KASSERT((td->td_pflags & TDP_NERRNO) == 0, 1414c6f466cSEdward Tomasz Napierala ("%s: TDP_NERRNO set", __func__)); 14226ccf4f1SKonstantin Belousov 143a1bd83feSEdward Tomasz Napierala sy_thr_static = (se->sy_thrcnt & SY_THR_STATIC) != 0; 144a1bd83feSEdward Tomasz Napierala 14546994ec2SMark Johnston if (__predict_false(SYSTRACE_ENABLED() || 146a1bd83feSEdward Tomasz Napierala AUDIT_SYSCALL_ENTER(sa->code, td) || 147a1bd83feSEdward Tomasz Napierala !sy_thr_static)) { 148a1bd83feSEdward Tomasz Napierala if (!sy_thr_static) { 149a1bd83feSEdward Tomasz Napierala error = syscall_thread_enter(td, se); 150a1bd83feSEdward Tomasz Napierala if (error != 0) { 151a1bd83feSEdward Tomasz Napierala td->td_errno = error; 152a1bd83feSEdward Tomasz Napierala goto retval; 153a1bd83feSEdward Tomasz Napierala } 154a1bd83feSEdward Tomasz Napierala } 155a1bd83feSEdward Tomasz Napierala 1562f729243SMateusz Guzik #ifdef KDTRACE_HOOKS 1572f729243SMateusz Guzik /* Give the syscall:::entry DTrace probe a chance to fire. */ 158bdc0cb4eSEdward Tomasz Napierala if (__predict_false(se->sy_entry != 0)) 1592f729243SMateusz Guzik (*systrace_probe_func)(sa, SYSTRACE_ENTRY, 0); 1602f729243SMateusz Guzik #endif 161bdc0cb4eSEdward Tomasz Napierala error = (se->sy_call)(td, sa->args); 16226ccf4f1SKonstantin Belousov /* Save the latest error return value. */ 1634c6f466cSEdward Tomasz Napierala if (__predict_false((td->td_pflags & TDP_NERRNO) != 0)) 1644c6f466cSEdward Tomasz Napierala td->td_pflags &= ~TDP_NERRNO; 1654c6f466cSEdward Tomasz Napierala else 16626ccf4f1SKonstantin Belousov td->td_errno = error; 167275c821dSKyle Evans 168275c821dSKyle Evans /* 169275c821dSKyle Evans * Note that some syscall implementations (e.g., sys_execve) 170275c821dSKyle Evans * will commit the audit record just before their final return. 171275c821dSKyle Evans * These were done under the assumption that nothing of interest 172275c821dSKyle Evans * would happen between their return and here, where we would 173275c821dSKyle Evans * normally commit the audit record. These assumptions will 174275c821dSKyle Evans * need to be revisited should any substantial logic be added 175275c821dSKyle Evans * above. 176275c821dSKyle Evans */ 1772f729243SMateusz Guzik AUDIT_SYSCALL_EXIT(error, td); 178275c821dSKyle Evans 17926ccf4f1SKonstantin Belousov #ifdef KDTRACE_HOOKS 1808ff6d9ddSMark Johnston /* Give the syscall:::return DTrace probe a chance to fire. */ 181bdc0cb4eSEdward Tomasz Napierala if (__predict_false(se->sy_return != 0)) 1828ff6d9ddSMark Johnston (*systrace_probe_func)(sa, SYSTRACE_RETURN, 1838ff6d9ddSMark Johnston error ? -1 : td->td_retval[0]); 18426ccf4f1SKonstantin Belousov #endif 185a1bd83feSEdward Tomasz Napierala 186a1bd83feSEdward Tomasz Napierala if (!sy_thr_static) 187a1bd83feSEdward Tomasz Napierala syscall_thread_exit(td, se); 1882f729243SMateusz Guzik } else { 189bdc0cb4eSEdward Tomasz Napierala error = (se->sy_call)(td, sa->args); 1902f729243SMateusz Guzik /* Save the latest error return value. */ 1914c6f466cSEdward Tomasz Napierala if (__predict_false((td->td_pflags & TDP_NERRNO) != 0)) 1924c6f466cSEdward Tomasz Napierala td->td_pflags &= ~TDP_NERRNO; 1934c6f466cSEdward Tomasz Napierala else 1942f729243SMateusz Guzik td->td_errno = error; 1952f729243SMateusz Guzik } 196c26541e3SJohn Baldwin 19726ccf4f1SKonstantin Belousov retval: 1984c44811cSJeff Roberson KTR_STOP4(KTR_SYSC, "syscall", syscallname(p, sa->code), 1997fc3ae51SOleksandr Tymoshenko (uintptr_t)td, "pid:%d", td->td_proc->p_pid, "error:%d", error, 2004c44811cSJeff Roberson "retval0:%#lx", td->td_retval[0], "retval1:%#lx", 2014c44811cSJeff Roberson td->td_retval[1]); 2020e84a878SMateusz Guzik if (__predict_false(traced)) { 20326ccf4f1SKonstantin Belousov PROC_LOCK(p); 20426ccf4f1SKonstantin Belousov td->td_dbgflags &= ~TDB_SCE; 20526ccf4f1SKonstantin Belousov PROC_UNLOCK(p); 20626ccf4f1SKonstantin Belousov } 20726ccf4f1SKonstantin Belousov (p->p_sysent->sv_set_syscall_retval)(td, error); 20826ccf4f1SKonstantin Belousov } 20926ccf4f1SKonstantin Belousov 21026ccf4f1SKonstantin Belousov static inline void 211c18ca749SJohn Baldwin syscallret(struct thread *td) 21226ccf4f1SKonstantin Belousov { 2137d065d87SMateusz Guzik struct proc *p; 2142d88da2fSKonstantin Belousov struct syscall_args *sa; 215643f6f47SKonstantin Belousov ksiginfo_t ksi; 2161af9474bSJohn Baldwin int traced; 21726ccf4f1SKonstantin Belousov 218441eb16aSKonstantin Belousov KASSERT(td->td_errno != ERELOOKUP, 219441eb16aSKonstantin Belousov ("ERELOOKUP not consumed syscall %d", td->td_sa.code)); 220aff57357SEd Schouten 22126ccf4f1SKonstantin Belousov p = td->td_proc; 2222d88da2fSKonstantin Belousov sa = &td->td_sa; 2230e84a878SMateusz Guzik if (__predict_false(td->td_errno == ENOTCAPABLE || 2240e84a878SMateusz Guzik td->td_errno == ECAPMODE)) { 2250e84a878SMateusz Guzik if ((trap_enotcap || 2260e84a878SMateusz Guzik (p->p_flag2 & P2_TRAPCAP) != 0) && IN_CAPABILITY_MODE(td)) { 227643f6f47SKonstantin Belousov ksiginfo_init_trap(&ksi); 228643f6f47SKonstantin Belousov ksi.ksi_signo = SIGTRAP; 2291af9474bSJohn Baldwin ksi.ksi_errno = td->td_errno; 230643f6f47SKonstantin Belousov ksi.ksi_code = TRAP_CAP; 231cf98bc28SDavid Chisnall ksi.ksi_info.si_syscall = sa->original_code; 232643f6f47SKonstantin Belousov trapsignal(td, &ksi); 233643f6f47SKonstantin Belousov } 234643f6f47SKonstantin Belousov } 23526ccf4f1SKonstantin Belousov 23626ccf4f1SKonstantin Belousov /* 23726ccf4f1SKonstantin Belousov * Handle reschedule and other end-of-syscall issues 23826ccf4f1SKonstantin Belousov */ 23926ccf4f1SKonstantin Belousov userret(td, td->td_frame); 24026ccf4f1SKonstantin Belousov 24126ccf4f1SKonstantin Belousov #ifdef KTRACE 2422dd9ea6fSKonstantin Belousov if (KTRPOINT(td, KTR_SYSRET)) { 2431af9474bSJohn Baldwin ktrsysret(sa->code, td->td_errno, td->td_retval[0]); 2442dd9ea6fSKonstantin Belousov } 24526ccf4f1SKonstantin Belousov #endif 24626ccf4f1SKonstantin Belousov 2470e84a878SMateusz Guzik traced = 0; 2480e84a878SMateusz Guzik if (__predict_false(p->p_flag & P_TRACED)) { 24926ccf4f1SKonstantin Belousov traced = 1; 25026ccf4f1SKonstantin Belousov PROC_LOCK(p); 25126ccf4f1SKonstantin Belousov td->td_dbgflags |= TDB_SCX; 25226ccf4f1SKonstantin Belousov PROC_UNLOCK(p); 2530e84a878SMateusz Guzik } 2540e84a878SMateusz Guzik if (__predict_false(traced || 2550e84a878SMateusz Guzik (td->td_dbgflags & (TDB_EXEC | TDB_FORK)) != 0)) { 25626ccf4f1SKonstantin Belousov PROC_LOCK(p); 257ce8bd78bSKonstantin Belousov /* 2586e66030cSEdward Tomasz Napierala * Linux debuggers expect an additional stop for exec, 2596e66030cSEdward Tomasz Napierala * between the usual syscall entry and exit. Raise 2606e66030cSEdward Tomasz Napierala * the exec event now and then clear TDB_EXEC so that 2616e66030cSEdward Tomasz Napierala * the next stop is reported as a syscall exit by 2626e66030cSEdward Tomasz Napierala * linux_ptrace_status(). 2638bbc0600SEdward Tomasz Napierala * 2648bbc0600SEdward Tomasz Napierala * We are accessing p->p_pptr without any additional 2658bbc0600SEdward Tomasz Napierala * locks here: it cannot change while p is kept locked; 2668bbc0600SEdward Tomasz Napierala * while the debugger could in theory change its ABI 2678bbc0600SEdward Tomasz Napierala * while tracing another process, the outcome of such 2688bbc0600SEdward Tomasz Napierala * a race wouln't be deterministic anyway. 2696e66030cSEdward Tomasz Napierala */ 2708bbc0600SEdward Tomasz Napierala if (traced && (td->td_dbgflags & TDB_EXEC) != 0 && 2718bbc0600SEdward Tomasz Napierala SV_PROC_ABI(p->p_pptr) == SV_ABI_LINUX) { 2726e66030cSEdward Tomasz Napierala ptracestop(td, SIGTRAP, NULL); 2736e66030cSEdward Tomasz Napierala td->td_dbgflags &= ~TDB_EXEC; 2746e66030cSEdward Tomasz Napierala } 2756e66030cSEdward Tomasz Napierala /* 276ce8bd78bSKonstantin Belousov * If tracing the execed process, trap to the debugger 277ce8bd78bSKonstantin Belousov * so that breakpoints can be set before the program 278ce8bd78bSKonstantin Belousov * executes. If debugger requested tracing of syscall 279ce8bd78bSKonstantin Belousov * returns, do it now too. 280ce8bd78bSKonstantin Belousov */ 2816ad1ff09SKonstantin Belousov if (traced && 2826ad1ff09SKonstantin Belousov ((td->td_dbgflags & (TDB_FORK | TDB_EXEC)) != 0 || 2838d570f64SJohn Baldwin (p->p_ptevents & PTRACE_SCX) != 0)) 28482a4538fSEric Badger ptracestop(td, SIGTRAP, NULL); 28526ccf4f1SKonstantin Belousov td->td_dbgflags &= ~(TDB_SCX | TDB_EXEC | TDB_FORK); 28626ccf4f1SKonstantin Belousov PROC_UNLOCK(p); 28726ccf4f1SKonstantin Belousov } 28826ccf4f1SKonstantin Belousov } 289