xref: /freebsd/sys/kern/subr_syscall.c (revision 6ad1ff09cc22ac59e15b16de65a05e57918b8589)
126ccf4f1SKonstantin Belousov /*-
226ccf4f1SKonstantin Belousov  * Copyright (C) 1994, David Greenman
326ccf4f1SKonstantin Belousov  * Copyright (c) 1990, 1993
426ccf4f1SKonstantin Belousov  *	The Regents of the University of California.  All rights reserved.
526ccf4f1SKonstantin Belousov  * Copyright (C) 2010 Konstantin Belousov <kib@freebsd.org>
626ccf4f1SKonstantin Belousov  *
726ccf4f1SKonstantin Belousov  * This code is derived from software contributed to Berkeley by
826ccf4f1SKonstantin Belousov  * the University of Utah, and William Jolitz.
926ccf4f1SKonstantin Belousov  *
1026ccf4f1SKonstantin Belousov  * Redistribution and use in source and binary forms, with or without
1126ccf4f1SKonstantin Belousov  * modification, are permitted provided that the following conditions
1226ccf4f1SKonstantin Belousov  * are met:
1326ccf4f1SKonstantin Belousov  * 1. Redistributions of source code must retain the above copyright
1426ccf4f1SKonstantin Belousov  *    notice, this list of conditions and the following disclaimer.
1526ccf4f1SKonstantin Belousov  * 2. Redistributions in binary form must reproduce the above copyright
1626ccf4f1SKonstantin Belousov  *    notice, this list of conditions and the following disclaimer in the
1726ccf4f1SKonstantin Belousov  *    documentation and/or other materials provided with the distribution.
1826ccf4f1SKonstantin Belousov  * 3. All advertising materials mentioning features or use of this software
1926ccf4f1SKonstantin Belousov  *    must display the following acknowledgement:
2026ccf4f1SKonstantin Belousov  *	This product includes software developed by the University of
2126ccf4f1SKonstantin Belousov  *	California, Berkeley and its contributors.
2226ccf4f1SKonstantin Belousov  * 4. Neither the name of the University nor the names of its contributors
2326ccf4f1SKonstantin Belousov  *    may be used to endorse or promote products derived from this software
2426ccf4f1SKonstantin Belousov  *    without specific prior written permission.
2526ccf4f1SKonstantin Belousov  *
2626ccf4f1SKonstantin Belousov  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2726ccf4f1SKonstantin Belousov  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2826ccf4f1SKonstantin Belousov  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2926ccf4f1SKonstantin Belousov  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
3026ccf4f1SKonstantin Belousov  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
3126ccf4f1SKonstantin Belousov  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
3226ccf4f1SKonstantin Belousov  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
3326ccf4f1SKonstantin Belousov  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
3426ccf4f1SKonstantin Belousov  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3526ccf4f1SKonstantin Belousov  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3626ccf4f1SKonstantin Belousov  * SUCH DAMAGE.
3726ccf4f1SKonstantin Belousov  *
3826ccf4f1SKonstantin Belousov  *	from: @(#)trap.c	7.4 (Berkeley) 5/13/91
3926ccf4f1SKonstantin Belousov  */
4026ccf4f1SKonstantin Belousov 
4126ccf4f1SKonstantin Belousov #include "opt_capsicum.h"
4226ccf4f1SKonstantin Belousov #include "opt_ktrace.h"
4326ccf4f1SKonstantin Belousov #include "opt_kdtrace.h"
4426ccf4f1SKonstantin Belousov 
4526ccf4f1SKonstantin Belousov __FBSDID("$FreeBSD$");
4626ccf4f1SKonstantin Belousov 
4726ccf4f1SKonstantin Belousov #include <sys/capability.h>
4826ccf4f1SKonstantin Belousov #include <sys/ktr.h>
4926ccf4f1SKonstantin Belousov #ifdef KTRACE
5026ccf4f1SKonstantin Belousov #include <sys/uio.h>
5126ccf4f1SKonstantin Belousov #include <sys/ktrace.h>
5226ccf4f1SKonstantin Belousov #endif
5326ccf4f1SKonstantin Belousov #include <security/audit/audit.h>
5426ccf4f1SKonstantin Belousov 
5526ccf4f1SKonstantin Belousov static inline int
5626ccf4f1SKonstantin Belousov syscallenter(struct thread *td, struct syscall_args *sa)
5726ccf4f1SKonstantin Belousov {
5826ccf4f1SKonstantin Belousov 	struct proc *p;
5926ccf4f1SKonstantin Belousov 	int error, traced;
6026ccf4f1SKonstantin Belousov 
6126ccf4f1SKonstantin Belousov 	PCPU_INC(cnt.v_syscall);
6226ccf4f1SKonstantin Belousov 	p = td->td_proc;
6326ccf4f1SKonstantin Belousov 
6426ccf4f1SKonstantin Belousov 	td->td_pticks = 0;
6526ccf4f1SKonstantin Belousov 	if (td->td_ucred != p->p_ucred)
6626ccf4f1SKonstantin Belousov 		cred_update_thread(td);
6726ccf4f1SKonstantin Belousov 	if (p->p_flag & P_TRACED) {
6826ccf4f1SKonstantin Belousov 		traced = 1;
6926ccf4f1SKonstantin Belousov 		PROC_LOCK(p);
7026ccf4f1SKonstantin Belousov 		td->td_dbgflags &= ~TDB_USERWR;
7126ccf4f1SKonstantin Belousov 		td->td_dbgflags |= TDB_SCE;
7226ccf4f1SKonstantin Belousov 		PROC_UNLOCK(p);
7326ccf4f1SKonstantin Belousov 	} else
7426ccf4f1SKonstantin Belousov 		traced = 0;
7526ccf4f1SKonstantin Belousov 	error = (p->p_sysent->sv_fetch_syscall_args)(td, sa);
7626ccf4f1SKonstantin Belousov #ifdef KTRACE
7726ccf4f1SKonstantin Belousov 	if (KTRPOINT(td, KTR_SYSCALL))
7826ccf4f1SKonstantin Belousov 		ktrsyscall(sa->code, sa->narg, sa->args);
7926ccf4f1SKonstantin Belousov #endif
8026ccf4f1SKonstantin Belousov 
8126ccf4f1SKonstantin Belousov 	CTR6(KTR_SYSC,
8226ccf4f1SKonstantin Belousov "syscall: td=%p pid %d %s (%#lx, %#lx, %#lx)",
8326ccf4f1SKonstantin Belousov 	    td, td->td_proc->p_pid, syscallname(p, sa->code),
8426ccf4f1SKonstantin Belousov 	    sa->args[0], sa->args[1], sa->args[2]);
8526ccf4f1SKonstantin Belousov 
8626ccf4f1SKonstantin Belousov 	if (error == 0) {
8726ccf4f1SKonstantin Belousov 		STOPEVENT(p, S_SCE, sa->narg);
8826ccf4f1SKonstantin Belousov 		PTRACESTOP_SC(p, td, S_PT_SCE);
8926ccf4f1SKonstantin Belousov 		if (td->td_dbgflags & TDB_USERWR) {
9026ccf4f1SKonstantin Belousov 			/*
9126ccf4f1SKonstantin Belousov 			 * Reread syscall number and arguments if
9226ccf4f1SKonstantin Belousov 			 * debugger modified registers or memory.
9326ccf4f1SKonstantin Belousov 			 */
9426ccf4f1SKonstantin Belousov 			error = (p->p_sysent->sv_fetch_syscall_args)(td, sa);
9526ccf4f1SKonstantin Belousov #ifdef KTRACE
9626ccf4f1SKonstantin Belousov 			if (KTRPOINT(td, KTR_SYSCALL))
9726ccf4f1SKonstantin Belousov 				ktrsyscall(sa->code, sa->narg, sa->args);
9826ccf4f1SKonstantin Belousov #endif
9926ccf4f1SKonstantin Belousov 			if (error != 0)
10026ccf4f1SKonstantin Belousov 				goto retval;
10126ccf4f1SKonstantin Belousov 		}
10226ccf4f1SKonstantin Belousov 
10326ccf4f1SKonstantin Belousov #ifdef CAPABILITY_MODE
10426ccf4f1SKonstantin Belousov 		/*
10526ccf4f1SKonstantin Belousov 		 * In capability mode, we only allow access to system calls
10626ccf4f1SKonstantin Belousov 		 * flagged with SYF_CAPENABLED.
10726ccf4f1SKonstantin Belousov 		 */
10826ccf4f1SKonstantin Belousov 		if (IN_CAPABILITY_MODE(td) &&
10926ccf4f1SKonstantin Belousov 		    !(sa->callp->sy_flags & SYF_CAPENABLED)) {
11026ccf4f1SKonstantin Belousov 			error = ECAPMODE;
11126ccf4f1SKonstantin Belousov 			goto retval;
11226ccf4f1SKonstantin Belousov 		}
11326ccf4f1SKonstantin Belousov #endif
11426ccf4f1SKonstantin Belousov 
11526ccf4f1SKonstantin Belousov 		error = syscall_thread_enter(td, sa->callp);
11626ccf4f1SKonstantin Belousov 		if (error != 0)
11726ccf4f1SKonstantin Belousov 			goto retval;
11826ccf4f1SKonstantin Belousov 
11926ccf4f1SKonstantin Belousov #ifdef KDTRACE_HOOKS
12026ccf4f1SKonstantin Belousov 		/*
12126ccf4f1SKonstantin Belousov 		 * If the systrace module has registered it's probe
12226ccf4f1SKonstantin Belousov 		 * callback and if there is a probe active for the
12326ccf4f1SKonstantin Belousov 		 * syscall 'entry', process the probe.
12426ccf4f1SKonstantin Belousov 		 */
12526ccf4f1SKonstantin Belousov 		if (systrace_probe_func != NULL && sa->callp->sy_entry != 0)
12626ccf4f1SKonstantin Belousov 			(*systrace_probe_func)(sa->callp->sy_entry, sa->code,
12726ccf4f1SKonstantin Belousov 			    sa->callp, sa->args, 0);
12826ccf4f1SKonstantin Belousov #endif
12926ccf4f1SKonstantin Belousov 
13026ccf4f1SKonstantin Belousov 		AUDIT_SYSCALL_ENTER(sa->code, td);
13126ccf4f1SKonstantin Belousov 		error = (sa->callp->sy_call)(td, sa->args);
13226ccf4f1SKonstantin Belousov 		AUDIT_SYSCALL_EXIT(error, td);
13326ccf4f1SKonstantin Belousov 
13426ccf4f1SKonstantin Belousov 		/* Save the latest error return value. */
13526ccf4f1SKonstantin Belousov 		td->td_errno = error;
13626ccf4f1SKonstantin Belousov 
13726ccf4f1SKonstantin Belousov #ifdef KDTRACE_HOOKS
13826ccf4f1SKonstantin Belousov 		/*
13926ccf4f1SKonstantin Belousov 		 * If the systrace module has registered it's probe
14026ccf4f1SKonstantin Belousov 		 * callback and if there is a probe active for the
14126ccf4f1SKonstantin Belousov 		 * syscall 'return', process the probe.
14226ccf4f1SKonstantin Belousov 		 */
14326ccf4f1SKonstantin Belousov 		if (systrace_probe_func != NULL && sa->callp->sy_return != 0)
14426ccf4f1SKonstantin Belousov 			(*systrace_probe_func)(sa->callp->sy_return, sa->code,
14526ccf4f1SKonstantin Belousov 			    sa->callp, NULL, (error) ? -1 : td->td_retval[0]);
14626ccf4f1SKonstantin Belousov #endif
14726ccf4f1SKonstantin Belousov 		syscall_thread_exit(td, sa->callp);
14826ccf4f1SKonstantin Belousov 		CTR4(KTR_SYSC, "syscall: p=%p error=%d return %#lx %#lx",
14926ccf4f1SKonstantin Belousov 		    p, error, td->td_retval[0], td->td_retval[1]);
15026ccf4f1SKonstantin Belousov 	}
15126ccf4f1SKonstantin Belousov  retval:
15226ccf4f1SKonstantin Belousov 	if (traced) {
15326ccf4f1SKonstantin Belousov 		PROC_LOCK(p);
15426ccf4f1SKonstantin Belousov 		td->td_dbgflags &= ~TDB_SCE;
15526ccf4f1SKonstantin Belousov 		PROC_UNLOCK(p);
15626ccf4f1SKonstantin Belousov 	}
15726ccf4f1SKonstantin Belousov 	(p->p_sysent->sv_set_syscall_retval)(td, error);
15826ccf4f1SKonstantin Belousov 	return (error);
15926ccf4f1SKonstantin Belousov }
16026ccf4f1SKonstantin Belousov 
16126ccf4f1SKonstantin Belousov static inline void
16226ccf4f1SKonstantin Belousov syscallret(struct thread *td, int error, struct syscall_args *sa __unused)
16326ccf4f1SKonstantin Belousov {
16426ccf4f1SKonstantin Belousov 	struct proc *p;
16526ccf4f1SKonstantin Belousov 	int traced;
16626ccf4f1SKonstantin Belousov 
16726ccf4f1SKonstantin Belousov 	p = td->td_proc;
16826ccf4f1SKonstantin Belousov 
16926ccf4f1SKonstantin Belousov 	/*
17026ccf4f1SKonstantin Belousov 	 * Check for misbehavior.
17126ccf4f1SKonstantin Belousov 	 */
17226ccf4f1SKonstantin Belousov 	WITNESS_WARN(WARN_PANIC, NULL, "System call %s returning",
17326ccf4f1SKonstantin Belousov 	    syscallname(p, sa->code));
17426ccf4f1SKonstantin Belousov 	KASSERT(td->td_critnest == 0,
17526ccf4f1SKonstantin Belousov 	    ("System call %s returning in a critical section",
17626ccf4f1SKonstantin Belousov 	    syscallname(p, sa->code)));
17726ccf4f1SKonstantin Belousov 	KASSERT(td->td_locks == 0,
17826ccf4f1SKonstantin Belousov 	    ("System call %s returning with %d locks held",
17926ccf4f1SKonstantin Belousov 	     syscallname(p, sa->code), td->td_locks));
18026ccf4f1SKonstantin Belousov 
18126ccf4f1SKonstantin Belousov 	/*
18226ccf4f1SKonstantin Belousov 	 * Handle reschedule and other end-of-syscall issues
18326ccf4f1SKonstantin Belousov 	 */
18426ccf4f1SKonstantin Belousov 	userret(td, td->td_frame);
18526ccf4f1SKonstantin Belousov 
18626ccf4f1SKonstantin Belousov 	CTR4(KTR_SYSC, "syscall %s exit thread %p pid %d proc %s",
18726ccf4f1SKonstantin Belousov 	    syscallname(p, sa->code), td, td->td_proc->p_pid, td->td_name);
18826ccf4f1SKonstantin Belousov 
18926ccf4f1SKonstantin Belousov #ifdef KTRACE
19026ccf4f1SKonstantin Belousov 	if (KTRPOINT(td, KTR_SYSRET))
19126ccf4f1SKonstantin Belousov 		ktrsysret(sa->code, error, td->td_retval[0]);
19226ccf4f1SKonstantin Belousov #endif
19326ccf4f1SKonstantin Belousov 
19426ccf4f1SKonstantin Belousov 	if (p->p_flag & P_TRACED) {
19526ccf4f1SKonstantin Belousov 		traced = 1;
19626ccf4f1SKonstantin Belousov 		PROC_LOCK(p);
19726ccf4f1SKonstantin Belousov 		td->td_dbgflags |= TDB_SCX;
19826ccf4f1SKonstantin Belousov 		PROC_UNLOCK(p);
19926ccf4f1SKonstantin Belousov 	} else
20026ccf4f1SKonstantin Belousov 		traced = 0;
20126ccf4f1SKonstantin Belousov 	/*
20226ccf4f1SKonstantin Belousov 	 * This works because errno is findable through the
20326ccf4f1SKonstantin Belousov 	 * register set.  If we ever support an emulation where this
20426ccf4f1SKonstantin Belousov 	 * is not the case, this code will need to be revisited.
20526ccf4f1SKonstantin Belousov 	 */
20626ccf4f1SKonstantin Belousov 	STOPEVENT(p, S_SCX, sa->code);
20726ccf4f1SKonstantin Belousov 	if (traced || (td->td_dbgflags & (TDB_EXEC | TDB_FORK)) != 0) {
20826ccf4f1SKonstantin Belousov 		PROC_LOCK(p);
209ce8bd78bSKonstantin Belousov 		/*
210ce8bd78bSKonstantin Belousov 		 * If tracing the execed process, trap to the debugger
211ce8bd78bSKonstantin Belousov 		 * so that breakpoints can be set before the program
212ce8bd78bSKonstantin Belousov 		 * executes.  If debugger requested tracing of syscall
213ce8bd78bSKonstantin Belousov 		 * returns, do it now too.
214ce8bd78bSKonstantin Belousov 		 */
215*6ad1ff09SKonstantin Belousov 		if (traced &&
216*6ad1ff09SKonstantin Belousov 		    ((td->td_dbgflags & (TDB_FORK | TDB_EXEC)) != 0 ||
217ce8bd78bSKonstantin Belousov 		    (p->p_stops & S_PT_SCX) != 0))
218ce8bd78bSKonstantin Belousov 			ptracestop(td, SIGTRAP);
21926ccf4f1SKonstantin Belousov 		td->td_dbgflags &= ~(TDB_SCX | TDB_EXEC | TDB_FORK);
22026ccf4f1SKonstantin Belousov 		PROC_UNLOCK(p);
22126ccf4f1SKonstantin Belousov 	}
22226ccf4f1SKonstantin Belousov }
223