xref: /freebsd/sys/kern/subr_trap.c (revision aa64588d28258aef88cc33b8043112e8856948d0)
1 /*-
2  * Copyright (C) 1994, David Greenman
3  * Copyright (c) 1990, 1993
4  *	The Regents of the University of California.  All rights reserved.
5  * Copyright (c) 2007 The FreeBSD Foundation
6  *
7  * This code is derived from software contributed to Berkeley by
8  * the University of Utah, and William Jolitz.
9  *
10  * Portions of this software were developed by A. Joseph Koshy under
11  * sponsorship from the FreeBSD Foundation and Google, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. All advertising materials mentioning features or use of this software
22  *    must display the following acknowledgement:
23  *	This product includes software developed by the University of
24  *	California, Berkeley and its contributors.
25  * 4. Neither the name of the University nor the names of its contributors
26  *    may be used to endorse or promote products derived from this software
27  *    without specific prior written permission.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39  * SUCH DAMAGE.
40  *
41  *	from: @(#)trap.c	7.4 (Berkeley) 5/13/91
42  */
43 
44 #include <sys/cdefs.h>
45 __FBSDID("$FreeBSD$");
46 
47 #include "opt_ktrace.h"
48 #ifdef __i386__
49 #include "opt_npx.h"
50 #endif
51 #include "opt_sched.h"
52 
53 #include <sys/param.h>
54 #include <sys/bus.h>
55 #include <sys/kernel.h>
56 #include <sys/lock.h>
57 #include <sys/mutex.h>
58 #include <sys/pmckern.h>
59 #include <sys/proc.h>
60 #include <sys/ktr.h>
61 #include <sys/pioctl.h>
62 #include <sys/ptrace.h>
63 #include <sys/resourcevar.h>
64 #include <sys/sched.h>
65 #include <sys/signalvar.h>
66 #include <sys/syscall.h>
67 #include <sys/sysent.h>
68 #include <sys/systm.h>
69 #include <sys/vmmeter.h>
70 #ifdef KTRACE
71 #include <sys/uio.h>
72 #include <sys/ktrace.h>
73 #endif
74 #include <security/audit/audit.h>
75 
76 #include <machine/cpu.h>
77 #include <machine/pcb.h>
78 
79 #ifdef XEN
80 #include <vm/vm.h>
81 #include <vm/vm_param.h>
82 #include <vm/pmap.h>
83 #endif
84 
85 #include <security/mac/mac_framework.h>
86 
87 /*
88  * Define the code needed before returning to user mode, for trap and
89  * syscall.
90  */
91 void
92 userret(struct thread *td, struct trapframe *frame)
93 {
94 	struct proc *p = td->td_proc;
95 
96 	CTR3(KTR_SYSC, "userret: thread %p (pid %d, %s)", td, p->p_pid,
97             td->td_name);
98 #if 0
99 #ifdef DIAGNOSTIC
100 	/* Check that we called signotify() enough. */
101 	PROC_LOCK(p);
102 	thread_lock(td);
103 	if (SIGPENDING(td) && ((td->td_flags & TDF_NEEDSIGCHK) == 0 ||
104 	    (td->td_flags & TDF_ASTPENDING) == 0))
105 		printf("failed to set signal flags properly for ast()\n");
106 	thread_unlock(td);
107 	PROC_UNLOCK(p);
108 #endif
109 #endif
110 #ifdef KTRACE
111 	KTRUSERRET(td);
112 #endif
113 	/*
114 	 * If this thread tickled GEOM, we need to wait for the giggling to
115 	 * stop before we return to userland
116 	 */
117 	if (td->td_pflags & TDP_GEOM)
118 		g_waitidle();
119 
120 	/*
121 	 * Charge system time if profiling.
122 	 */
123 	if (p->p_flag & P_PROFIL) {
124 		addupc_task(td, TRAPF_PC(frame), td->td_pticks * psratio);
125 	}
126 	/*
127 	 * Let the scheduler adjust our priority etc.
128 	 */
129 	sched_userret(td);
130 	KASSERT(td->td_locks == 0,
131 	    ("userret: Returning with %d locks held.", td->td_locks));
132 #ifdef XEN
133 	PT_UPDATES_FLUSH();
134 #endif
135 }
136 
137 /*
138  * Process an asynchronous software trap.
139  * This is relatively easy.
140  * This function will return with preemption disabled.
141  */
142 void
143 ast(struct trapframe *framep)
144 {
145 	struct thread *td;
146 	struct proc *p;
147 	int flags;
148 	int sig;
149 #if defined(DEV_NPX) && !defined(SMP)
150 	int ucode;
151 	ksiginfo_t ksi;
152 #endif
153 
154 	td = curthread;
155 	p = td->td_proc;
156 
157 	CTR3(KTR_SYSC, "ast: thread %p (pid %d, %s)", td, p->p_pid,
158             p->p_comm);
159 	KASSERT(TRAPF_USERMODE(framep), ("ast in kernel mode"));
160 	WITNESS_WARN(WARN_PANIC, NULL, "Returning to user mode");
161 	mtx_assert(&Giant, MA_NOTOWNED);
162 	THREAD_LOCK_ASSERT(td, MA_NOTOWNED);
163 	td->td_frame = framep;
164 	td->td_pticks = 0;
165 
166 	/*
167 	 * This updates the td_flag's for the checks below in one
168 	 * "atomic" operation with turning off the astpending flag.
169 	 * If another AST is triggered while we are handling the
170 	 * AST's saved in flags, the astpending flag will be set and
171 	 * ast() will be called again.
172 	 */
173 	thread_lock(td);
174 	flags = td->td_flags;
175 	td->td_flags &= ~(TDF_ASTPENDING | TDF_NEEDSIGCHK | TDF_NEEDSUSPCHK |
176 	    TDF_NEEDRESCHED | TDF_ALRMPEND | TDF_PROFPEND | TDF_MACPEND);
177 	thread_unlock(td);
178 	PCPU_INC(cnt.v_trap);
179 
180 	if (td->td_ucred != p->p_ucred)
181 		cred_update_thread(td);
182 	if (td->td_pflags & TDP_OWEUPC && p->p_flag & P_PROFIL) {
183 		addupc_task(td, td->td_profil_addr, td->td_profil_ticks);
184 		td->td_profil_ticks = 0;
185 		td->td_pflags &= ~TDP_OWEUPC;
186 	}
187 	if (flags & TDF_ALRMPEND) {
188 		PROC_LOCK(p);
189 		psignal(p, SIGVTALRM);
190 		PROC_UNLOCK(p);
191 	}
192 #if defined(DEV_NPX) && !defined(SMP)
193 	if (PCPU_GET(curpcb)->pcb_flags & PCB_NPXTRAP) {
194 		atomic_clear_int(&PCPU_GET(curpcb)->pcb_flags,
195 		    PCB_NPXTRAP);
196 		ucode = npxtrap();
197 		if (ucode != -1) {
198 			ksiginfo_init_trap(&ksi);
199 			ksi.ksi_signo = SIGFPE;
200 			ksi.ksi_code = ucode;
201 			trapsignal(td, &ksi);
202 		}
203 	}
204 #endif
205 	if (flags & TDF_PROFPEND) {
206 		PROC_LOCK(p);
207 		psignal(p, SIGPROF);
208 		PROC_UNLOCK(p);
209 	}
210 #ifdef MAC
211 	if (flags & TDF_MACPEND)
212 		mac_thread_userret(td);
213 #endif
214 	if (flags & TDF_NEEDRESCHED) {
215 #ifdef KTRACE
216 		if (KTRPOINT(td, KTR_CSW))
217 			ktrcsw(1, 1);
218 #endif
219 		thread_lock(td);
220 		sched_prio(td, td->td_user_pri);
221 		mi_switch(SW_INVOL | SWT_NEEDRESCHED, NULL);
222 		thread_unlock(td);
223 #ifdef KTRACE
224 		if (KTRPOINT(td, KTR_CSW))
225 			ktrcsw(0, 1);
226 #endif
227 	}
228 
229 	/*
230 	 * Check for signals. Unlocked reads of p_pendingcnt or
231 	 * p_siglist might cause process-directed signal to be handled
232 	 * later.
233 	 */
234 	if (flags & TDF_NEEDSIGCHK || p->p_pendingcnt > 0 ||
235 	    !SIGISEMPTY(p->p_siglist)) {
236 		PROC_LOCK(p);
237 		mtx_lock(&p->p_sigacts->ps_mtx);
238 		while ((sig = cursig(td, SIG_STOP_ALLOWED)) != 0)
239 			postsig(sig);
240 		mtx_unlock(&p->p_sigacts->ps_mtx);
241 		PROC_UNLOCK(p);
242 	}
243 	/*
244 	 * We need to check to see if we have to exit or wait due to a
245 	 * single threading requirement or some other STOP condition.
246 	 */
247 	if (flags & TDF_NEEDSUSPCHK) {
248 		PROC_LOCK(p);
249 		thread_suspend_check(0);
250 		PROC_UNLOCK(p);
251 	}
252 
253 	if (td->td_pflags & TDP_OLDMASK) {
254 		td->td_pflags &= ~TDP_OLDMASK;
255 		kern_sigprocmask(td, SIG_SETMASK, &td->td_oldsigmask, NULL, 0);
256 	}
257 
258 	userret(td, framep);
259 	mtx_assert(&Giant, MA_NOTOWNED);
260 }
261 
262 #ifdef HAVE_SYSCALL_ARGS_DEF
263 const char *
264 syscallname(struct proc *p, u_int code)
265 {
266 	static const char unknown[] = "unknown";
267 
268 	if (p->p_sysent->sv_syscallnames == NULL)
269 		return (unknown);
270 	return (p->p_sysent->sv_syscallnames[code]);
271 }
272 
273 int
274 syscallenter(struct thread *td, struct syscall_args *sa)
275 {
276 	struct proc *p;
277 	int error, traced;
278 
279 	PCPU_INC(cnt.v_syscall);
280 	p = td->td_proc;
281 	td->td_syscalls++;
282 
283 	td->td_pticks = 0;
284 	if (td->td_ucred != p->p_ucred)
285 		cred_update_thread(td);
286 	if (p->p_flag & P_TRACED) {
287 		traced = 1;
288 		PROC_LOCK(p);
289 		td->td_dbgflags &= ~TDB_USERWR;
290 		td->td_dbgflags |= TDB_SCE;
291 		PROC_UNLOCK(p);
292 	} else
293 		traced = 0;
294 	error = (p->p_sysent->sv_fetch_syscall_args)(td, sa);
295 #ifdef KTRACE
296 	if (KTRPOINT(td, KTR_SYSCALL))
297 		ktrsyscall(sa->code, sa->narg, sa->args);
298 #endif
299 
300 	CTR6(KTR_SYSC,
301 "syscall: td=%p pid %d %s (%#lx, %#lx, %#lx)",
302 	    td, td->td_proc->p_pid, syscallname(p, sa->code),
303 	    sa->args[0], sa->args[1], sa->args[2]);
304 
305 	if (error == 0) {
306 		STOPEVENT(p, S_SCE, sa->narg);
307 		PTRACESTOP_SC(p, td, S_PT_SCE);
308 		if (td->td_dbgflags & TDB_USERWR) {
309 			/*
310 			 * Reread syscall number and arguments if
311 			 * debugger modified registers or memory.
312 			 */
313 			error = (p->p_sysent->sv_fetch_syscall_args)(td, sa);
314 #ifdef KTRACE
315 			if (KTRPOINT(td, KTR_SYSCALL))
316 				ktrsyscall(sa->code, sa->narg, sa->args);
317 #endif
318 			if (error != 0)
319 				goto retval;
320 		}
321 
322 #ifdef KDTRACE_HOOKS
323 		/*
324 		 * If the systrace module has registered it's probe
325 		 * callback and if there is a probe active for the
326 		 * syscall 'entry', process the probe.
327 		 */
328 		if (systrace_probe_func != NULL && sa->callp->sy_entry != 0)
329 			(*systrace_probe_func)(sa->callp->sy_entry, sa->code,
330 			    sa->callp, sa->args);
331 #endif
332 
333 		AUDIT_SYSCALL_ENTER(sa->code, td);
334 		error = (sa->callp->sy_call)(td, sa->args);
335 		AUDIT_SYSCALL_EXIT(error, td);
336 
337 		/* Save the latest error return value. */
338 		td->td_errno = error;
339 
340 #ifdef KDTRACE_HOOKS
341 		/*
342 		 * If the systrace module has registered it's probe
343 		 * callback and if there is a probe active for the
344 		 * syscall 'return', process the probe.
345 		 */
346 		if (systrace_probe_func != NULL && sa->callp->sy_return != 0)
347 			(*systrace_probe_func)(sa->callp->sy_return, sa->code,
348 			    sa->callp, sa->args);
349 #endif
350 		CTR4(KTR_SYSC, "syscall: p=%p error=%d return %#lx %#lx",
351 		    p, error, td->td_retval[0], td->td_retval[1]);
352 	}
353  retval:
354 	if (traced) {
355 		PROC_LOCK(p);
356 		td->td_dbgflags &= ~TDB_SCE;
357 		PROC_UNLOCK(p);
358 	}
359 	(p->p_sysent->sv_set_syscall_retval)(td, error);
360 	return (error);
361 }
362 
363 void
364 syscallret(struct thread *td, int error, struct syscall_args *sa __unused)
365 {
366 	struct proc *p;
367 	int traced;
368 
369 	p = td->td_proc;
370 
371 	/*
372 	 * Check for misbehavior.
373 	 */
374 	WITNESS_WARN(WARN_PANIC, NULL, "System call %s returning",
375 	    syscallname(p, sa->code));
376 	KASSERT(td->td_critnest == 0,
377 	    ("System call %s returning in a critical section",
378 	    syscallname(p, sa->code)));
379 	KASSERT(td->td_locks == 0,
380 	    ("System call %s returning with %d locks held",
381 	     syscallname(p, sa->code), td->td_locks));
382 
383 	/*
384 	 * Handle reschedule and other end-of-syscall issues
385 	 */
386 	userret(td, td->td_frame);
387 
388 	CTR4(KTR_SYSC, "syscall %s exit thread %p pid %d proc %s",
389 	    syscallname(p, sa->code), td, td->td_proc->p_pid, td->td_name);
390 
391 #ifdef KTRACE
392 	if (KTRPOINT(td, KTR_SYSRET))
393 		ktrsysret(sa->code, error, td->td_retval[0]);
394 #endif
395 
396 	if (p->p_flag & P_TRACED) {
397 		traced = 1;
398 		PROC_LOCK(p);
399 		td->td_dbgflags |= TDB_SCX;
400 		PROC_UNLOCK(p);
401 	} else
402 		traced = 0;
403 	/*
404 	 * This works because errno is findable through the
405 	 * register set.  If we ever support an emulation where this
406 	 * is not the case, this code will need to be revisited.
407 	 */
408 	STOPEVENT(p, S_SCX, sa->code);
409 	PTRACESTOP_SC(p, td, S_PT_SCX);
410 	if (traced || (td->td_dbgflags & TDB_EXEC) != 0) {
411 		PROC_LOCK(p);
412 		td->td_dbgflags &= ~(TDB_SCX | TDB_EXEC);
413 		PROC_UNLOCK(p);
414 	}
415 }
416 #endif /* HAVE_SYSCALL_ARGS_DEF */
417