xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision 67d39748499e85cff626c202aa2cb6e9f180283e)
1d66a5066SPeter Wemm /*-
29a14aa01SUlrich Spörlein  * Copyright (c) 1994-1996 Søren Schmidt
3d66a5066SPeter Wemm  * All rights reserved.
4d66a5066SPeter Wemm  *
5d66a5066SPeter Wemm  * Redistribution and use in source and binary forms, with or without
6d66a5066SPeter Wemm  * modification, are permitted provided that the following conditions
7d66a5066SPeter Wemm  * are met:
8d66a5066SPeter Wemm  * 1. Redistributions of source code must retain the above copyright
9d66a5066SPeter Wemm  *    notice, this list of conditions and the following disclaimer
10d66a5066SPeter Wemm  *    in this position and unchanged.
11d66a5066SPeter Wemm  * 2. Redistributions in binary form must reproduce the above copyright
12d66a5066SPeter Wemm  *    notice, this list of conditions and the following disclaimer in the
13d66a5066SPeter Wemm  *    documentation and/or other materials provided with the distribution.
14d66a5066SPeter Wemm  * 3. The name of the author may not be used to endorse or promote products
1521dc7d4fSJens Schweikhardt  *    derived from this software without specific prior written permission
16d66a5066SPeter Wemm  *
17d66a5066SPeter Wemm  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18d66a5066SPeter Wemm  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19d66a5066SPeter Wemm  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20d66a5066SPeter Wemm  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21d66a5066SPeter Wemm  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22d66a5066SPeter Wemm  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23d66a5066SPeter Wemm  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24d66a5066SPeter Wemm  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25d66a5066SPeter Wemm  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26d66a5066SPeter Wemm  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27d66a5066SPeter Wemm  */
28d66a5066SPeter Wemm 
2927e0099cSDavid E. O'Brien #include <sys/cdefs.h>
3027e0099cSDavid E. O'Brien __FBSDID("$FreeBSD$");
3127e0099cSDavid E. O'Brien 
32d66a5066SPeter Wemm #include <sys/param.h>
3375f83872SPeter Wemm #include <sys/systm.h>
34ff22c670SBruce Evans #include <sys/exec.h>
3557b4252eSKonstantin Belousov #include <sys/fcntl.h>
36d66a5066SPeter Wemm #include <sys/imgact.h>
3722d4b0fbSJohn Polstra #include <sys/imgact_aout.h>
38e1743d02SSøren Schmidt #include <sys/imgact_elf.h>
39ff22c670SBruce Evans #include <sys/kernel.h>
407106ca0dSJohn Baldwin #include <sys/lock.h>
41e1743d02SSøren Schmidt #include <sys/malloc.h>
42ff22c670SBruce Evans #include <sys/module.h>
4323955314SAlfred Perlstein #include <sys/mutex.h>
44fb919e4dSMark Murray #include <sys/proc.h>
45fb919e4dSMark Murray #include <sys/signalvar.h>
46206a5d3aSIan Dowse #include <sys/syscallsubr.h>
47*67d39748SDmitry Chagin #include <sys/sysctl.h>
48fb919e4dSMark Murray #include <sys/sysent.h>
49fb919e4dSMark Murray #include <sys/sysproto.h>
50a9148ab1SPeter Wemm #include <sys/vnode.h>
519b44bfc5SAlexander Leidinger #include <sys/eventhandler.h>
52fb919e4dSMark Murray 
53d66a5066SPeter Wemm #include <vm/vm.h>
54a9148ab1SPeter Wemm #include <vm/pmap.h>
55ff22c670SBruce Evans #include <vm/vm_extern.h>
56a9148ab1SPeter Wemm #include <vm/vm_map.h>
57a9148ab1SPeter Wemm #include <vm/vm_object.h>
58ff22c670SBruce Evans #include <vm/vm_page.h>
59ff22c670SBruce Evans #include <vm/vm_param.h>
60ff22c670SBruce Evans 
61ff22c670SBruce Evans #include <machine/cpu.h>
624d7c2e8aSDmitry Chagin #include <machine/cputypes.h>
63ff22c670SBruce Evans #include <machine/md_var.h>
64d3adf769SDavid Schultz #include <machine/pcb.h>
65a9148ab1SPeter Wemm 
66d66a5066SPeter Wemm #include <i386/linux/linux.h>
67ebea8660SMarcel Moolenaar #include <i386/linux/linux_proto.h>
6894cb2ecfSAlexander Leidinger #include <compat/linux/linux_emul.h>
69fde63162SDmitry Chagin #include <compat/linux/linux_futex.h>
70d825ce0aSJohn Baldwin #include <compat/linux/linux_ioctl.h>
710f9d6538SJohn Baldwin #include <compat/linux/linux_mib.h>
724d7c2e8aSDmitry Chagin #include <compat/linux/linux_misc.h>
73b595ab37SAndrew Gallatin #include <compat/linux/linux_signal.h>
74322bfdc3SMarcel Moolenaar #include <compat/linux/linux_util.h>
75bdc37934SDmitry Chagin #include <compat/linux/linux_vdso.h>
76e1743d02SSøren Schmidt 
771d91482dSPeter Wemm MODULE_VERSION(linux, 1);
781d91482dSPeter Wemm 
79d323ddf3SMatthew Dillon #if BYTE_ORDER == LITTLE_ENDIAN
80d323ddf3SMatthew Dillon #define SHELLMAGIC      0x2123 /* #! */
81d323ddf3SMatthew Dillon #else
82d323ddf3SMatthew Dillon #define SHELLMAGIC      0x2321
83d323ddf3SMatthew Dillon #endif
84d323ddf3SMatthew Dillon 
85*67d39748SDmitry Chagin #if defined(DEBUG)
86*67d39748SDmitry Chagin SYSCTL_PROC(_compat_linux, OID_AUTO, debug,
87*67d39748SDmitry Chagin             CTLTYPE_STRING | CTLFLAG_RW,
88*67d39748SDmitry Chagin             0, 0, linux_sysctl_debug, "A",
89*67d39748SDmitry Chagin             "Linux debugging control");
90*67d39748SDmitry Chagin #endif
91*67d39748SDmitry Chagin 
92e061a6caSMarcel Moolenaar /*
93e061a6caSMarcel Moolenaar  * Allow the sendsig functions to use the ldebug() facility
94e061a6caSMarcel Moolenaar  * even though they are not syscalls themselves. Map them
95e061a6caSMarcel Moolenaar  * to syscall 0. This is slightly less bogus than using
96e061a6caSMarcel Moolenaar  * ldebug(sigreturn).
97e061a6caSMarcel Moolenaar  */
98e061a6caSMarcel Moolenaar #define	LINUX_SYS_linux_rt_sendsig	0
99e061a6caSMarcel Moolenaar #define	LINUX_SYS_linux_sendsig		0
100e061a6caSMarcel Moolenaar 
1018f1e49a6SDmitry Chagin #define	LINUX_PS_STRINGS	(LINUX_USRSTACK - sizeof(struct ps_strings))
1028f1e49a6SDmitry Chagin 
103bdc37934SDmitry Chagin static int linux_szsigcode;
104bdc37934SDmitry Chagin static vm_object_t linux_shared_page_obj;
105bdc37934SDmitry Chagin static char *linux_shared_page_mapping;
106bdc37934SDmitry Chagin extern char _binary_linux_locore_o_start;
107bdc37934SDmitry Chagin extern char _binary_linux_locore_o_end;
10843bef515SMarcel Moolenaar 
10943bef515SMarcel Moolenaar extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
11043bef515SMarcel Moolenaar 
111f41325dbSPeter Wemm SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
11243bef515SMarcel Moolenaar 
11389c9a483SAlfred Perlstein static int	linux_fixup(register_t **stack_base,
11489c9a483SAlfred Perlstein 		    struct image_params *iparams);
11589c9a483SAlfred Perlstein static int	elf_linux_fixup(register_t **stack_base,
11689c9a483SAlfred Perlstein 		    struct image_params *iparams);
1179104847fSDavid Xu static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
118a107d8aaSNathan Whitehorn static void	exec_linux_setregs(struct thread *td,
119a107d8aaSNathan Whitehorn 		    struct image_params *imgp, u_long stack);
1204d7c2e8aSDmitry Chagin static register_t *linux_copyout_strings(struct image_params *imgp);
12189ffc202SBjoern A. Zeeb static boolean_t linux_trans_osrel(const Elf_Note *note, int32_t *osrel);
122bdc37934SDmitry Chagin static void	linux_vdso_install(void *param);
123bdc37934SDmitry Chagin static void	linux_vdso_deinstall(void *param);
1244d7c2e8aSDmitry Chagin 
1254d7c2e8aSDmitry Chagin static int linux_szplatform;
1260020bdf1SDmitry Chagin const char *linux_kplatform;
127d66a5066SPeter Wemm 
1289b44bfc5SAlexander Leidinger static eventhandler_tag linux_exit_tag;
1299b44bfc5SAlexander Leidinger static eventhandler_tag linux_exec_tag;
13081338031SDmitry Chagin static eventhandler_tag linux_thread_dtor_tag;
1319b44bfc5SAlexander Leidinger 
132d66a5066SPeter Wemm /*
133d66a5066SPeter Wemm  * Linux syscalls return negative errno's, we do positive and map them
13450e422f0SAlexander Leidinger  * Reference:
13550e422f0SAlexander Leidinger  *   FreeBSD: src/sys/sys/errno.h
13650e422f0SAlexander Leidinger  *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
13750e422f0SAlexander Leidinger  *            linux-2.6.17.8/include/asm-generic/errno.h
138d66a5066SPeter Wemm  */
13985f118c8SDmitrij Tejblum static int bsd_to_linux_errno[ELAST + 1] = {
140d66a5066SPeter Wemm 	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
141d66a5066SPeter Wemm 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
142d66a5066SPeter Wemm 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
143d66a5066SPeter Wemm 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
144d66a5066SPeter Wemm 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
145d66a5066SPeter Wemm 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
146d66a5066SPeter Wemm 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
147d66a5066SPeter Wemm 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
14850e422f0SAlexander Leidinger 	  -6,  -6, -43, -42, -75,-125, -84, -95, -16, -74,
14950e422f0SAlexander Leidinger 	 -72, -67, -71
150d66a5066SPeter Wemm };
151d66a5066SPeter Wemm 
152956d3333SMarcel Moolenaar int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
153956d3333SMarcel Moolenaar 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
154956d3333SMarcel Moolenaar 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
155ba873f4cSAlexander Kabaev 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
156956d3333SMarcel Moolenaar 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
157956d3333SMarcel Moolenaar 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
158956d3333SMarcel Moolenaar 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
159956d3333SMarcel Moolenaar 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
160956d3333SMarcel Moolenaar 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
161d66a5066SPeter Wemm };
162d66a5066SPeter Wemm 
163956d3333SMarcel Moolenaar int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
164956d3333SMarcel Moolenaar 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
165956d3333SMarcel Moolenaar 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
166956d3333SMarcel Moolenaar 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
167956d3333SMarcel Moolenaar 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
168956d3333SMarcel Moolenaar 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
169956d3333SMarcel Moolenaar 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
170956d3333SMarcel Moolenaar 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
171ba873f4cSAlexander Kabaev 	SIGIO, SIGURG, SIGSYS
172d66a5066SPeter Wemm };
173d66a5066SPeter Wemm 
17427a828fcSPierre Beyssac #define LINUX_T_UNKNOWN  255
17527a828fcSPierre Beyssac static int _bsd_to_linux_trapcode[] = {
17627a828fcSPierre Beyssac 	LINUX_T_UNKNOWN,	/* 0 */
17727a828fcSPierre Beyssac 	6,			/* 1  T_PRIVINFLT */
17827a828fcSPierre Beyssac 	LINUX_T_UNKNOWN,	/* 2 */
17927a828fcSPierre Beyssac 	3,			/* 3  T_BPTFLT */
18027a828fcSPierre Beyssac 	LINUX_T_UNKNOWN,	/* 4 */
18127a828fcSPierre Beyssac 	LINUX_T_UNKNOWN,	/* 5 */
18227a828fcSPierre Beyssac 	16,			/* 6  T_ARITHTRAP */
18327a828fcSPierre Beyssac 	254,			/* 7  T_ASTFLT */
18427a828fcSPierre Beyssac 	LINUX_T_UNKNOWN,	/* 8 */
18527a828fcSPierre Beyssac 	13,			/* 9  T_PROTFLT */
18627a828fcSPierre Beyssac 	1,			/* 10 T_TRCTRAP */
18727a828fcSPierre Beyssac 	LINUX_T_UNKNOWN,	/* 11 */
18827a828fcSPierre Beyssac 	14,			/* 12 T_PAGEFLT */
18927a828fcSPierre Beyssac 	LINUX_T_UNKNOWN,	/* 13 */
19027a828fcSPierre Beyssac 	17,			/* 14 T_ALIGNFLT */
19127a828fcSPierre Beyssac 	LINUX_T_UNKNOWN,	/* 15 */
19227a828fcSPierre Beyssac 	LINUX_T_UNKNOWN,	/* 16 */
19327a828fcSPierre Beyssac 	LINUX_T_UNKNOWN,	/* 17 */
19427a828fcSPierre Beyssac 	0,			/* 18 T_DIVIDE */
19527a828fcSPierre Beyssac 	2,			/* 19 T_NMI */
19627a828fcSPierre Beyssac 	4,			/* 20 T_OFLOW */
19727a828fcSPierre Beyssac 	5,			/* 21 T_BOUND */
19827a828fcSPierre Beyssac 	7,			/* 22 T_DNA */
19927a828fcSPierre Beyssac 	8,			/* 23 T_DOUBLEFLT */
20027a828fcSPierre Beyssac 	9,			/* 24 T_FPOPFLT */
20127a828fcSPierre Beyssac 	10,			/* 25 T_TSSFLT */
20227a828fcSPierre Beyssac 	11,			/* 26 T_SEGNPFLT */
20327a828fcSPierre Beyssac 	12,			/* 27 T_STKFLT */
20427a828fcSPierre Beyssac 	18,			/* 28 T_MCHK */
20527a828fcSPierre Beyssac 	19,			/* 29 T_XMMFLT */
20627a828fcSPierre Beyssac 	15			/* 30 T_RESERVED */
20727a828fcSPierre Beyssac };
20827a828fcSPierre Beyssac #define bsd_to_linux_trapcode(code) \
20927a828fcSPierre Beyssac     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
21027a828fcSPierre Beyssac      _bsd_to_linux_trapcode[(code)]: \
21127a828fcSPierre Beyssac      LINUX_T_UNKNOWN)
21227a828fcSPierre Beyssac 
213bdc37934SDmitry Chagin LINUX_VDSO_SYM_INTPTR(linux_sigcode);
214bdc37934SDmitry Chagin LINUX_VDSO_SYM_INTPTR(linux_rt_sigcode);
215bdc37934SDmitry Chagin LINUX_VDSO_SYM_INTPTR(linux_vsyscall);
216bdc37934SDmitry Chagin 
217288078beSEivind Eklund /*
218288078beSEivind Eklund  * If FreeBSD & Linux have a difference of opinion about what a trap
219288078beSEivind Eklund  * means, deal with it here.
220356861dbSMatthew Dillon  *
221356861dbSMatthew Dillon  * MPSAFE
222288078beSEivind Eklund  */
223288078beSEivind Eklund static int
224288078beSEivind Eklund translate_traps(int signal, int trap_code)
225288078beSEivind Eklund {
226d563a53aSEivind Eklund 	if (signal != SIGBUS)
227af682d48SDmitry Chagin 		return (signal);
228288078beSEivind Eklund 	switch (trap_code) {
229288078beSEivind Eklund 	case T_PROTFLT:
230288078beSEivind Eklund 	case T_TSSFLT:
231288078beSEivind Eklund 	case T_DOUBLEFLT:
232288078beSEivind Eklund 	case T_PAGEFLT:
233af682d48SDmitry Chagin 		return (SIGSEGV);
234288078beSEivind Eklund 	default:
235af682d48SDmitry Chagin 		return (signal);
236288078beSEivind Eklund 	}
237288078beSEivind Eklund }
238288078beSEivind Eklund 
239303b270bSEivind Eklund static int
240654f6be1SBruce Evans linux_fixup(register_t **stack_base, struct image_params *imgp)
241d66a5066SPeter Wemm {
242654f6be1SBruce Evans 	register_t *argv, *envp;
243d66a5066SPeter Wemm 
244d66a5066SPeter Wemm 	argv = *stack_base;
245610ecfe0SMaxim Sobolev 	envp = *stack_base + (imgp->args->argc + 1);
246d66a5066SPeter Wemm 	(*stack_base)--;
247aa103453SKonstantin Belousov 	suword(*stack_base, (intptr_t)(void *)envp);
248d66a5066SPeter Wemm 	(*stack_base)--;
249aa103453SKonstantin Belousov 	suword(*stack_base, (intptr_t)(void *)argv);
250d66a5066SPeter Wemm 	(*stack_base)--;
251aa103453SKonstantin Belousov 	suword(*stack_base, imgp->args->argc);
2524d7c2e8aSDmitry Chagin 	return (0);
253d66a5066SPeter Wemm }
254d66a5066SPeter Wemm 
255303b270bSEivind Eklund static int
256654f6be1SBruce Evans elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
257e1743d02SSøren Schmidt {
2584d7c2e8aSDmitry Chagin 	struct proc *p;
25943cf129cSJohn Baldwin 	Elf32_Auxargs *args;
2604d7c2e8aSDmitry Chagin 	Elf32_Addr *uplatform;
2614d7c2e8aSDmitry Chagin 	struct ps_strings *arginfo;
262654f6be1SBruce Evans 	register_t *pos;
263d66a5066SPeter Wemm 
2646617724cSJeff Roberson 	KASSERT(curthread->td_proc == imgp->proc,
26543cf129cSJohn Baldwin 	    ("unsafe elf_linux_fixup(), should be curproc"));
2664d7c2e8aSDmitry Chagin 
2674d7c2e8aSDmitry Chagin 	p = imgp->proc;
2684d7c2e8aSDmitry Chagin 	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
269acface68SDmitry Chagin 	uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szplatform);
27043cf129cSJohn Baldwin 	args = (Elf32_Auxargs *)imgp->auxargs;
271610ecfe0SMaxim Sobolev 	pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
272e1743d02SSøren Schmidt 
273bdc37934SDmitry Chagin 	AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR,
274bdc37934SDmitry Chagin 	    imgp->proc->p_sysent->sv_shared_page_base);
275bdc37934SDmitry Chagin 	AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO, linux_vsyscall);
2764d7c2e8aSDmitry Chagin 	AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature);
2778d30f381SDmitry Chagin 
2788d30f381SDmitry Chagin 	/*
2798d30f381SDmitry Chagin 	 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
2808d30f381SDmitry Chagin 	 * as it has appeared in the 2.4.0-rc7 first time.
2818d30f381SDmitry Chagin 	 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
2828d30f381SDmitry Chagin 	 * glibc falls back to the hard-coded CLK_TCK value when aux entry
2838d30f381SDmitry Chagin 	 * is not present.
2848d30f381SDmitry Chagin 	 * Also see linux_times() implementation.
2858d30f381SDmitry Chagin 	 */
2868d30f381SDmitry Chagin 	if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
2871ca16454SDmitry Chagin 		AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz);
288e1743d02SSøren Schmidt 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
289e1743d02SSøren Schmidt 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
290e1743d02SSøren Schmidt 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
291e1743d02SSøren Schmidt 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
292e1743d02SSøren Schmidt 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
293e1743d02SSøren Schmidt 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
294e1743d02SSøren Schmidt 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
2954d7c2e8aSDmitry Chagin 	AUXARGS_ENTRY(pos, LINUX_AT_SECURE, 0);
296b1fc0ec1SRobert Watson 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
297b1fc0ec1SRobert Watson 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
298b1fc0ec1SRobert Watson 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
299b1fc0ec1SRobert Watson 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
3004d7c2e8aSDmitry Chagin 	AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
3014d7c2e8aSDmitry Chagin 	if (args->execfd != -1)
3024d7c2e8aSDmitry Chagin 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
303e1743d02SSøren Schmidt 	AUXARGS_ENTRY(pos, AT_NULL, 0);
304e1743d02SSøren Schmidt 
305e1743d02SSøren Schmidt 	free(imgp->auxargs, M_TEMP);
306e1743d02SSøren Schmidt 	imgp->auxargs = NULL;
307e1743d02SSøren Schmidt 
308e1743d02SSøren Schmidt 	(*stack_base)--;
309aa103453SKonstantin Belousov 	suword(*stack_base, (register_t)imgp->args->argc);
3104d7c2e8aSDmitry Chagin 	return (0);
311e1743d02SSøren Schmidt }
312d66a5066SPeter Wemm 
3134d7c2e8aSDmitry Chagin /*
3144d7c2e8aSDmitry Chagin  * Copied from kern/kern_exec.c
3154d7c2e8aSDmitry Chagin  */
3164d7c2e8aSDmitry Chagin static register_t *
3174d7c2e8aSDmitry Chagin linux_copyout_strings(struct image_params *imgp)
3184d7c2e8aSDmitry Chagin {
3194d7c2e8aSDmitry Chagin 	int argc, envc;
3204d7c2e8aSDmitry Chagin 	char **vectp;
3214d7c2e8aSDmitry Chagin 	char *stringp, *destp;
3224d7c2e8aSDmitry Chagin 	register_t *stack_base;
3234d7c2e8aSDmitry Chagin 	struct ps_strings *arginfo;
3244d7c2e8aSDmitry Chagin 	struct proc *p;
3254d7c2e8aSDmitry Chagin 
3264d7c2e8aSDmitry Chagin 	/*
3274d7c2e8aSDmitry Chagin 	 * Calculate string base and vector table pointers.
3284d7c2e8aSDmitry Chagin 	 */
3294d7c2e8aSDmitry Chagin 	p = imgp->proc;
3304d7c2e8aSDmitry Chagin 	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
3318f1e49a6SDmitry Chagin 	destp = (caddr_t)arginfo - SPARE_USRSPACE - linux_szplatform -
3328f1e49a6SDmitry Chagin 	    roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *));
3334d7c2e8aSDmitry Chagin 
3344d7c2e8aSDmitry Chagin 	/*
3354d7c2e8aSDmitry Chagin 	 * install LINUX_PLATFORM
3364d7c2e8aSDmitry Chagin 	 */
3370020bdf1SDmitry Chagin 	copyout(linux_kplatform, ((caddr_t)arginfo - linux_szplatform),
3388f1e49a6SDmitry Chagin 	    linux_szplatform);
3394d7c2e8aSDmitry Chagin 
3404d7c2e8aSDmitry Chagin 	/*
3414d7c2e8aSDmitry Chagin 	 * If we have a valid auxargs ptr, prepare some room
3424d7c2e8aSDmitry Chagin 	 * on the stack.
3434d7c2e8aSDmitry Chagin 	 */
3444d7c2e8aSDmitry Chagin 	if (imgp->auxargs) {
3454d7c2e8aSDmitry Chagin 		/*
3464d7c2e8aSDmitry Chagin 		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
3474d7c2e8aSDmitry Chagin 		 * lower compatibility.
3484d7c2e8aSDmitry Chagin 		 */
3494d7c2e8aSDmitry Chagin 		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
3504d7c2e8aSDmitry Chagin 		    (LINUX_AT_COUNT * 2);
3514d7c2e8aSDmitry Chagin 		/*
3524d7c2e8aSDmitry Chagin 		 * The '+ 2' is for the null pointers at the end of each of
3534d7c2e8aSDmitry Chagin 		 * the arg and env vector sets,and imgp->auxarg_size is room
3544d7c2e8aSDmitry Chagin 		 * for argument of Runtime loader.
3554d7c2e8aSDmitry Chagin 		 */
3564d7c2e8aSDmitry Chagin 		vectp = (char **)(destp - (imgp->args->argc +
3574d7c2e8aSDmitry Chagin 		    imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *));
3584d7c2e8aSDmitry Chagin 	} else {
3594d7c2e8aSDmitry Chagin 		/*
3604d7c2e8aSDmitry Chagin 		 * The '+ 2' is for the null pointers at the end of each of
3614d7c2e8aSDmitry Chagin 		 * the arg and env vector sets
3624d7c2e8aSDmitry Chagin 		 */
3634d7c2e8aSDmitry Chagin 		vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) *
3644d7c2e8aSDmitry Chagin 		    sizeof(char *));
3654d7c2e8aSDmitry Chagin 	}
3664d7c2e8aSDmitry Chagin 
3674d7c2e8aSDmitry Chagin 	/*
3684d7c2e8aSDmitry Chagin 	 * vectp also becomes our initial stack base
3694d7c2e8aSDmitry Chagin 	 */
3704d7c2e8aSDmitry Chagin 	stack_base = (register_t *)vectp;
3714d7c2e8aSDmitry Chagin 
3724d7c2e8aSDmitry Chagin 	stringp = imgp->args->begin_argv;
3734d7c2e8aSDmitry Chagin 	argc = imgp->args->argc;
3744d7c2e8aSDmitry Chagin 	envc = imgp->args->envc;
3754d7c2e8aSDmitry Chagin 
3764d7c2e8aSDmitry Chagin 	/*
3774d7c2e8aSDmitry Chagin 	 * Copy out strings - arguments and environment.
3784d7c2e8aSDmitry Chagin 	 */
3794d7c2e8aSDmitry Chagin 	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
3804d7c2e8aSDmitry Chagin 
3814d7c2e8aSDmitry Chagin 	/*
3824d7c2e8aSDmitry Chagin 	 * Fill in "ps_strings" struct for ps, w, etc.
3834d7c2e8aSDmitry Chagin 	 */
3844d7c2e8aSDmitry Chagin 	suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
3854d7c2e8aSDmitry Chagin 	suword(&arginfo->ps_nargvstr, argc);
3864d7c2e8aSDmitry Chagin 
3874d7c2e8aSDmitry Chagin 	/*
3884d7c2e8aSDmitry Chagin 	 * Fill in argument portion of vector table.
3894d7c2e8aSDmitry Chagin 	 */
3904d7c2e8aSDmitry Chagin 	for (; argc > 0; --argc) {
3914d7c2e8aSDmitry Chagin 		suword(vectp++, (long)(intptr_t)destp);
3924d7c2e8aSDmitry Chagin 		while (*stringp++ != 0)
3934d7c2e8aSDmitry Chagin 			destp++;
3944d7c2e8aSDmitry Chagin 		destp++;
3954d7c2e8aSDmitry Chagin 	}
3964d7c2e8aSDmitry Chagin 
3974d7c2e8aSDmitry Chagin 	/* a null vector table pointer separates the argp's from the envp's */
3984d7c2e8aSDmitry Chagin 	suword(vectp++, 0);
3994d7c2e8aSDmitry Chagin 
4004d7c2e8aSDmitry Chagin 	suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
4014d7c2e8aSDmitry Chagin 	suword(&arginfo->ps_nenvstr, envc);
4024d7c2e8aSDmitry Chagin 
4034d7c2e8aSDmitry Chagin 	/*
4044d7c2e8aSDmitry Chagin 	 * Fill in environment portion of vector table.
4054d7c2e8aSDmitry Chagin 	 */
4064d7c2e8aSDmitry Chagin 	for (; envc > 0; --envc) {
4074d7c2e8aSDmitry Chagin 		suword(vectp++, (long)(intptr_t)destp);
4084d7c2e8aSDmitry Chagin 		while (*stringp++ != 0)
4094d7c2e8aSDmitry Chagin 			destp++;
4104d7c2e8aSDmitry Chagin 		destp++;
4114d7c2e8aSDmitry Chagin 	}
4124d7c2e8aSDmitry Chagin 
4134d7c2e8aSDmitry Chagin 	/* end of vector table is a null pointer */
4144d7c2e8aSDmitry Chagin 	suword(vectp, 0);
4154d7c2e8aSDmitry Chagin 
4164d7c2e8aSDmitry Chagin 	return (stack_base);
4174d7c2e8aSDmitry Chagin }
4184d7c2e8aSDmitry Chagin 
41979363394SAndrew Gallatin static void
4209104847fSDavid Xu linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
42179363394SAndrew Gallatin {
4221d062e2bSDag-Erling Smørgrav 	struct thread *td = curthread;
4231d062e2bSDag-Erling Smørgrav 	struct proc *p = td->td_proc;
42490af4afaSJohn Baldwin 	struct sigacts *psp;
4251d062e2bSDag-Erling Smørgrav 	struct trapframe *regs;
4265002a60fSMarcel Moolenaar 	struct l_rt_sigframe *fp, frame;
4279104847fSDavid Xu 	int sig, code;
42879363394SAndrew Gallatin 	int oonstack;
42979363394SAndrew Gallatin 
4309104847fSDavid Xu 	sig = ksi->ksi_signo;
4319104847fSDavid Xu 	code = ksi->ksi_code;
432df53e91cSJohn Baldwin 	PROC_LOCK_ASSERT(p, MA_OWNED);
43390af4afaSJohn Baldwin 	psp = p->p_sigacts;
43490af4afaSJohn Baldwin 	mtx_assert(&psp->ps_mtx, MA_OWNED);
435b40ce416SJulian Elischer 	regs = td->td_frame;
436d034d459SMarcel Moolenaar 	oonstack = sigonstack(regs->tf_esp);
43779363394SAndrew Gallatin 
43879363394SAndrew Gallatin #ifdef DEBUG
4395002a60fSMarcel Moolenaar 	if (ldebug(rt_sendsig))
440728ef954SJohn Baldwin 		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
44124593369SJonathan Lemon 		    catcher, sig, (void*)mask, code);
44279363394SAndrew Gallatin #endif
44379363394SAndrew Gallatin 	/*
44479363394SAndrew Gallatin 	 * Allocate space for the signal handler context.
44579363394SAndrew Gallatin 	 */
446a30ec4b9SDavid Xu 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
44790af4afaSJohn Baldwin 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
448a30ec4b9SDavid Xu 		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
449a30ec4b9SDavid Xu 		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
450d034d459SMarcel Moolenaar 	} else
4515002a60fSMarcel Moolenaar 		fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
45290af4afaSJohn Baldwin 	mtx_unlock(&psp->ps_mtx);
45379363394SAndrew Gallatin 
45479363394SAndrew Gallatin 	/*
45579363394SAndrew Gallatin 	 * Build the argument list for the signal handler.
45679363394SAndrew Gallatin 	 */
45779363394SAndrew Gallatin 	if (p->p_sysent->sv_sigtbl)
45879363394SAndrew Gallatin 		if (sig <= p->p_sysent->sv_sigsize)
45979363394SAndrew Gallatin 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
46079363394SAndrew Gallatin 
46199d45c5fSMarcel Moolenaar 	bzero(&frame, sizeof(frame));
46299d45c5fSMarcel Moolenaar 
46379363394SAndrew Gallatin 	frame.sf_handler = catcher;
46479363394SAndrew Gallatin 	frame.sf_sig = sig;
46579363394SAndrew Gallatin 	frame.sf_siginfo = &fp->sf_si;
46679363394SAndrew Gallatin 	frame.sf_ucontext = &fp->sf_sc;
467cc6ca9b3SMarcel Moolenaar 
4689d05b77dSJuli Mallett 	/* Fill in POSIX parts */
469aa8b2011SKonstantin Belousov 	ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
470cc6ca9b3SMarcel Moolenaar 
47179363394SAndrew Gallatin 	/*
47279363394SAndrew Gallatin 	 * Build the signal context to be used by sigreturn.
47379363394SAndrew Gallatin 	 */
474cc6ca9b3SMarcel Moolenaar 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
475cc6ca9b3SMarcel Moolenaar 	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
476cc6ca9b3SMarcel Moolenaar 
477a30ec4b9SDavid Xu 	frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp;
478a30ec4b9SDavid Xu 	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
479a30ec4b9SDavid Xu 	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
480d034d459SMarcel Moolenaar 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
481611d9407SJohn Baldwin 	PROC_UNLOCK(p);
482cc6ca9b3SMarcel Moolenaar 
483cc6ca9b3SMarcel Moolenaar 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
484cc6ca9b3SMarcel Moolenaar 
485cc6ca9b3SMarcel Moolenaar 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
48679363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
48779363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
48879363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
48979363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
49079363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
49179363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
49279363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
49379363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
494bdc37934SDmitry Chagin 	frame.sf_sc.uc_mcontext.sc_esp    = regs->tf_esp;
49579363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
49679363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
49779363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
49879363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
49979363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
50079363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
50179363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
50279363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
50379363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
50496a2b635SKonstantin Belousov 	frame.sf_sc.uc_mcontext.sc_cr2    = (register_t)ksi->ksi_addr;
50527a828fcSPierre Beyssac 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
50679363394SAndrew Gallatin 
50779363394SAndrew Gallatin #ifdef DEBUG
5085002a60fSMarcel Moolenaar 	if (ldebug(rt_sendsig))
50924593369SJonathan Lemon 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
5109b778a16SDavid Xu 		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
5119b778a16SDavid Xu 		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
51279363394SAndrew Gallatin #endif
51379363394SAndrew Gallatin 
51479363394SAndrew Gallatin 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
51579363394SAndrew Gallatin 		/*
51679363394SAndrew Gallatin 		 * Process has trashed its stack; give it an illegal
51779363394SAndrew Gallatin 		 * instruction to halt it in its tracks.
51879363394SAndrew Gallatin 		 */
51989734883SAlan Cox #ifdef DEBUG
52089734883SAlan Cox 		if (ldebug(rt_sendsig))
52189734883SAlan Cox 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
52289734883SAlan Cox 			    fp, oonstack);
52389734883SAlan Cox #endif
52419eb87d2SJohn Baldwin 		PROC_LOCK(p);
525b40ce416SJulian Elischer 		sigexit(td, SIGILL);
52679363394SAndrew Gallatin 	}
52779363394SAndrew Gallatin 
52879363394SAndrew Gallatin 	/*
52979363394SAndrew Gallatin 	 * Build context to run handler in.
53079363394SAndrew Gallatin 	 */
53179363394SAndrew Gallatin 	regs->tf_esp = (int)fp;
532bdc37934SDmitry Chagin 	regs->tf_eip = linux_rt_sigcode;
53322eca0bfSKonstantin Belousov 	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
53479363394SAndrew Gallatin 	regs->tf_cs = _ucodesel;
53579363394SAndrew Gallatin 	regs->tf_ds = _udatasel;
53679363394SAndrew Gallatin 	regs->tf_es = _udatasel;
53779363394SAndrew Gallatin 	regs->tf_fs = _udatasel;
53879363394SAndrew Gallatin 	regs->tf_ss = _udatasel;
539df53e91cSJohn Baldwin 	PROC_LOCK(p);
54090af4afaSJohn Baldwin 	mtx_lock(&psp->ps_mtx);
54179363394SAndrew Gallatin }
54279363394SAndrew Gallatin 
543d66a5066SPeter Wemm 
544d66a5066SPeter Wemm /*
545d66a5066SPeter Wemm  * Send an interrupt to process.
546d66a5066SPeter Wemm  *
547d66a5066SPeter Wemm  * Stack is set up to allow sigcode stored
548d66a5066SPeter Wemm  * in u. to call routine, followed by kcall
549d66a5066SPeter Wemm  * to sigreturn routine below.  After sigreturn
550d66a5066SPeter Wemm  * resets the signal mask, the stack, and the
551d66a5066SPeter Wemm  * frame pointer, it returns to the user
552d66a5066SPeter Wemm  * specified pc, psl.
553d66a5066SPeter Wemm  */
554303b270bSEivind Eklund static void
5559104847fSDavid Xu linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
556d66a5066SPeter Wemm {
5571d062e2bSDag-Erling Smørgrav 	struct thread *td = curthread;
5581d062e2bSDag-Erling Smørgrav 	struct proc *p = td->td_proc;
55990af4afaSJohn Baldwin 	struct sigacts *psp;
5601d062e2bSDag-Erling Smørgrav 	struct trapframe *regs;
5615002a60fSMarcel Moolenaar 	struct l_sigframe *fp, frame;
5625002a60fSMarcel Moolenaar 	l_sigset_t lmask;
5639104847fSDavid Xu 	int sig, code;
5642c4ab9ddSAndrew Gallatin 	int oonstack, i;
565d66a5066SPeter Wemm 
5662509e6c2SJohn Baldwin 	PROC_LOCK_ASSERT(p, MA_OWNED);
56790af4afaSJohn Baldwin 	psp = p->p_sigacts;
5689104847fSDavid Xu 	sig = ksi->ksi_signo;
5699104847fSDavid Xu 	code = ksi->ksi_code;
57090af4afaSJohn Baldwin 	mtx_assert(&psp->ps_mtx, MA_OWNED);
57190af4afaSJohn Baldwin 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
572cc6ca9b3SMarcel Moolenaar 		/* Signal handler installed with SA_SIGINFO. */
5739104847fSDavid Xu 		linux_rt_sendsig(catcher, ksi, mask);
574cc6ca9b3SMarcel Moolenaar 		return;
575cc6ca9b3SMarcel Moolenaar 	}
576b40ce416SJulian Elischer 	regs = td->td_frame;
577d034d459SMarcel Moolenaar 	oonstack = sigonstack(regs->tf_esp);
578d66a5066SPeter Wemm 
579d66a5066SPeter Wemm #ifdef DEBUG
5805002a60fSMarcel Moolenaar 	if (ldebug(sendsig))
581728ef954SJohn Baldwin 		printf(ARGS(sendsig, "%p, %d, %p, %u"),
58224593369SJonathan Lemon 		    catcher, sig, (void*)mask, code);
583d66a5066SPeter Wemm #endif
58479363394SAndrew Gallatin 
585d66a5066SPeter Wemm 	/*
586d66a5066SPeter Wemm 	 * Allocate space for the signal handler context.
587d66a5066SPeter Wemm 	 */
588a30ec4b9SDavid Xu 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
58990af4afaSJohn Baldwin 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
590a30ec4b9SDavid Xu 		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
591a30ec4b9SDavid Xu 		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
592d034d459SMarcel Moolenaar 	} else
5935002a60fSMarcel Moolenaar 		fp = (struct l_sigframe *)regs->tf_esp - 1;
59490af4afaSJohn Baldwin 	mtx_unlock(&psp->ps_mtx);
595611d9407SJohn Baldwin 	PROC_UNLOCK(p);
596d66a5066SPeter Wemm 
597d66a5066SPeter Wemm 	/*
598d66a5066SPeter Wemm 	 * Build the argument list for the signal handler.
599d66a5066SPeter Wemm 	 */
600956d3333SMarcel Moolenaar 	if (p->p_sysent->sv_sigtbl)
601956d3333SMarcel Moolenaar 		if (sig <= p->p_sysent->sv_sigsize)
602956d3333SMarcel Moolenaar 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
603d66a5066SPeter Wemm 
60499d45c5fSMarcel Moolenaar 	bzero(&frame, sizeof(frame));
60599d45c5fSMarcel Moolenaar 
606d66a5066SPeter Wemm 	frame.sf_handler = catcher;
607d66a5066SPeter Wemm 	frame.sf_sig = sig;
608d66a5066SPeter Wemm 
609cc6ca9b3SMarcel Moolenaar 	bsd_to_linux_sigset(mask, &lmask);
610cc6ca9b3SMarcel Moolenaar 
611d66a5066SPeter Wemm 	/*
612d66a5066SPeter Wemm 	 * Build the signal context to be used by sigreturn.
613d66a5066SPeter Wemm 	 */
614cc6ca9b3SMarcel Moolenaar 	frame.sf_sc.sc_mask   = lmask.__bits[0];
6155206bca1SLuoqi Chen 	frame.sf_sc.sc_gs     = rgs();
6165206bca1SLuoqi Chen 	frame.sf_sc.sc_fs     = regs->tf_fs;
617213fdd80SPeter Wemm 	frame.sf_sc.sc_es     = regs->tf_es;
618213fdd80SPeter Wemm 	frame.sf_sc.sc_ds     = regs->tf_ds;
619213fdd80SPeter Wemm 	frame.sf_sc.sc_edi    = regs->tf_edi;
620213fdd80SPeter Wemm 	frame.sf_sc.sc_esi    = regs->tf_esi;
621213fdd80SPeter Wemm 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
622213fdd80SPeter Wemm 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
623bdc37934SDmitry Chagin 	frame.sf_sc.sc_esp    = regs->tf_esp;
624213fdd80SPeter Wemm 	frame.sf_sc.sc_edx    = regs->tf_edx;
625213fdd80SPeter Wemm 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
626213fdd80SPeter Wemm 	frame.sf_sc.sc_eax    = regs->tf_eax;
627213fdd80SPeter Wemm 	frame.sf_sc.sc_eip    = regs->tf_eip;
628213fdd80SPeter Wemm 	frame.sf_sc.sc_cs     = regs->tf_cs;
629213fdd80SPeter Wemm 	frame.sf_sc.sc_eflags = regs->tf_eflags;
630213fdd80SPeter Wemm 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
631213fdd80SPeter Wemm 	frame.sf_sc.sc_ss     = regs->tf_ss;
632213fdd80SPeter Wemm 	frame.sf_sc.sc_err    = regs->tf_err;
63396a2b635SKonstantin Belousov 	frame.sf_sc.sc_cr2    = (register_t)ksi->ksi_addr;
6349104847fSDavid Xu 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno);
635cc6ca9b3SMarcel Moolenaar 
6362c4ab9ddSAndrew Gallatin 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
637cc6ca9b3SMarcel Moolenaar 		frame.sf_extramask[i] = lmask.__bits[i+1];
638d66a5066SPeter Wemm 
639d66a5066SPeter Wemm 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
640d66a5066SPeter Wemm 		/*
641d66a5066SPeter Wemm 		 * Process has trashed its stack; give it an illegal
642d66a5066SPeter Wemm 		 * instruction to halt it in its tracks.
643d66a5066SPeter Wemm 		 */
64419eb87d2SJohn Baldwin 		PROC_LOCK(p);
645b40ce416SJulian Elischer 		sigexit(td, SIGILL);
646d66a5066SPeter Wemm 	}
647d66a5066SPeter Wemm 
648d66a5066SPeter Wemm 	/*
649d66a5066SPeter Wemm 	 * Build context to run handler in.
650d66a5066SPeter Wemm 	 */
651213fdd80SPeter Wemm 	regs->tf_esp = (int)fp;
652bdc37934SDmitry Chagin 	regs->tf_eip = linux_sigcode;
65322eca0bfSKonstantin Belousov 	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
654213fdd80SPeter Wemm 	regs->tf_cs = _ucodesel;
655213fdd80SPeter Wemm 	regs->tf_ds = _udatasel;
656213fdd80SPeter Wemm 	regs->tf_es = _udatasel;
6575206bca1SLuoqi Chen 	regs->tf_fs = _udatasel;
658213fdd80SPeter Wemm 	regs->tf_ss = _udatasel;
6595002a60fSMarcel Moolenaar 	PROC_LOCK(p);
66090af4afaSJohn Baldwin 	mtx_lock(&psp->ps_mtx);
661d66a5066SPeter Wemm }
662d66a5066SPeter Wemm 
663d66a5066SPeter Wemm /*
664d66a5066SPeter Wemm  * System call to cleanup state after a signal
665d66a5066SPeter Wemm  * has been taken.  Reset signal mask and
666d66a5066SPeter Wemm  * stack state from context left by sendsig (above).
667d66a5066SPeter Wemm  * Return to previous pc and psl as specified by
668d66a5066SPeter Wemm  * context left by sendsig. Check carefully to
669d66a5066SPeter Wemm  * make sure that the user has not modified the
670d66a5066SPeter Wemm  * psl to gain improper privileges or to cause
671d66a5066SPeter Wemm  * a machine fault.
672d66a5066SPeter Wemm  */
673d66a5066SPeter Wemm int
674b07cd97eSMark Murray linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
675d66a5066SPeter Wemm {
6765002a60fSMarcel Moolenaar 	struct l_sigframe frame;
6771d062e2bSDag-Erling Smørgrav 	struct trapframe *regs;
6785002a60fSMarcel Moolenaar 	l_sigset_t lmask;
679d6e029adSKonstantin Belousov 	sigset_t bmask;
6802c4ab9ddSAndrew Gallatin 	int eflags, i;
6819104847fSDavid Xu 	ksiginfo_t ksi;
682d66a5066SPeter Wemm 
683b40ce416SJulian Elischer 	regs = td->td_frame;
684d66a5066SPeter Wemm 
685d66a5066SPeter Wemm #ifdef DEBUG
68624593369SJonathan Lemon 	if (ldebug(sigreturn))
68724593369SJonathan Lemon 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
688d66a5066SPeter Wemm #endif
689d66a5066SPeter Wemm 	/*
690cc6ca9b3SMarcel Moolenaar 	 * The trampoline code hands us the sigframe.
691d66a5066SPeter Wemm 	 * It is unsafe to keep track of it ourselves, in the event that a
692d66a5066SPeter Wemm 	 * program jumps out of a signal handler.
693d66a5066SPeter Wemm 	 */
6944b7ef73dSDag-Erling Smørgrav 	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
695d66a5066SPeter Wemm 		return (EFAULT);
696d66a5066SPeter Wemm 
697d66a5066SPeter Wemm 	/*
698d66a5066SPeter Wemm 	 * Check for security violations.
699d66a5066SPeter Wemm 	 */
700d66a5066SPeter Wemm #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
701cc6ca9b3SMarcel Moolenaar 	eflags = frame.sf_sc.sc_eflags;
7023d271aaaSEd Maste 	if (!EFLAGS_SECURE(eflags, regs->tf_eflags))
703d66a5066SPeter Wemm 		return (EINVAL);
704d66a5066SPeter Wemm 
705d66a5066SPeter Wemm 	/*
706d66a5066SPeter Wemm 	 * Don't allow users to load a valid privileged %cs.  Let the
707d66a5066SPeter Wemm 	 * hardware check for invalid selectors, excess privilege in
708d66a5066SPeter Wemm 	 * other selectors, invalid %eip's and invalid %esp's.
709d66a5066SPeter Wemm 	 */
71040d50994SPhilippe Charnier #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
711cc6ca9b3SMarcel Moolenaar 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
7129104847fSDavid Xu 		ksiginfo_init_trap(&ksi);
7139104847fSDavid Xu 		ksi.ksi_signo = SIGBUS;
7149104847fSDavid Xu 		ksi.ksi_code = BUS_OBJERR;
7159104847fSDavid Xu 		ksi.ksi_trapno = T_PROTFLT;
7169104847fSDavid Xu 		ksi.ksi_addr = (void *)regs->tf_eip;
7179104847fSDavid Xu 		trapsignal(td, &ksi);
718d66a5066SPeter Wemm 		return (EINVAL);
719d66a5066SPeter Wemm 	}
720d66a5066SPeter Wemm 
721cc6ca9b3SMarcel Moolenaar 	lmask.__bits[0] = frame.sf_sc.sc_mask;
7222c4ab9ddSAndrew Gallatin 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
723cc6ca9b3SMarcel Moolenaar 		lmask.__bits[i+1] = frame.sf_extramask[i];
724d6e029adSKonstantin Belousov 	linux_to_bsd_sigset(&lmask, &bmask);
725d6e029adSKonstantin Belousov 	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
726956d3333SMarcel Moolenaar 
727d66a5066SPeter Wemm 	/*
728d66a5066SPeter Wemm 	 * Restore signal context.
729d66a5066SPeter Wemm 	 */
7305206bca1SLuoqi Chen 	/* %gs was restored by the trampoline. */
731cc6ca9b3SMarcel Moolenaar 	regs->tf_fs     = frame.sf_sc.sc_fs;
732cc6ca9b3SMarcel Moolenaar 	regs->tf_es     = frame.sf_sc.sc_es;
733cc6ca9b3SMarcel Moolenaar 	regs->tf_ds     = frame.sf_sc.sc_ds;
734cc6ca9b3SMarcel Moolenaar 	regs->tf_edi    = frame.sf_sc.sc_edi;
735cc6ca9b3SMarcel Moolenaar 	regs->tf_esi    = frame.sf_sc.sc_esi;
736cc6ca9b3SMarcel Moolenaar 	regs->tf_ebp    = frame.sf_sc.sc_ebp;
737cc6ca9b3SMarcel Moolenaar 	regs->tf_ebx    = frame.sf_sc.sc_ebx;
738cc6ca9b3SMarcel Moolenaar 	regs->tf_edx    = frame.sf_sc.sc_edx;
739cc6ca9b3SMarcel Moolenaar 	regs->tf_ecx    = frame.sf_sc.sc_ecx;
740cc6ca9b3SMarcel Moolenaar 	regs->tf_eax    = frame.sf_sc.sc_eax;
741cc6ca9b3SMarcel Moolenaar 	regs->tf_eip    = frame.sf_sc.sc_eip;
742cc6ca9b3SMarcel Moolenaar 	regs->tf_cs     = frame.sf_sc.sc_cs;
743213fdd80SPeter Wemm 	regs->tf_eflags = eflags;
744cc6ca9b3SMarcel Moolenaar 	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
745cc6ca9b3SMarcel Moolenaar 	regs->tf_ss     = frame.sf_sc.sc_ss;
746d66a5066SPeter Wemm 
747d66a5066SPeter Wemm 	return (EJUSTRETURN);
748d66a5066SPeter Wemm }
749d66a5066SPeter Wemm 
75079363394SAndrew Gallatin /*
75179363394SAndrew Gallatin  * System call to cleanup state after a signal
75279363394SAndrew Gallatin  * has been taken.  Reset signal mask and
75379363394SAndrew Gallatin  * stack state from context left by rt_sendsig (above).
75479363394SAndrew Gallatin  * Return to previous pc and psl as specified by
75579363394SAndrew Gallatin  * context left by sendsig. Check carefully to
75679363394SAndrew Gallatin  * make sure that the user has not modified the
75779363394SAndrew Gallatin  * psl to gain improper privileges or to cause
75879363394SAndrew Gallatin  * a machine fault.
75979363394SAndrew Gallatin  */
76079363394SAndrew Gallatin int
761b07cd97eSMark Murray linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
76279363394SAndrew Gallatin {
7635002a60fSMarcel Moolenaar 	struct l_ucontext uc;
7645002a60fSMarcel Moolenaar 	struct l_sigcontext *context;
765d6e029adSKonstantin Belousov 	sigset_t bmask;
7665002a60fSMarcel Moolenaar 	l_stack_t *lss;
767206a5d3aSIan Dowse 	stack_t ss;
7681d062e2bSDag-Erling Smørgrav 	struct trapframe *regs;
76979363394SAndrew Gallatin 	int eflags;
7709104847fSDavid Xu 	ksiginfo_t ksi;
77179363394SAndrew Gallatin 
772b40ce416SJulian Elischer 	regs = td->td_frame;
77379363394SAndrew Gallatin 
77479363394SAndrew Gallatin #ifdef DEBUG
77524593369SJonathan Lemon 	if (ldebug(rt_sigreturn))
77624593369SJonathan Lemon 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
77779363394SAndrew Gallatin #endif
77879363394SAndrew Gallatin 	/*
779cc6ca9b3SMarcel Moolenaar 	 * The trampoline code hands us the ucontext.
78079363394SAndrew Gallatin 	 * It is unsafe to keep track of it ourselves, in the event that a
78179363394SAndrew Gallatin 	 * program jumps out of a signal handler.
78279363394SAndrew Gallatin 	 */
7834b7ef73dSDag-Erling Smørgrav 	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
78479363394SAndrew Gallatin 		return (EFAULT);
78579363394SAndrew Gallatin 
78679363394SAndrew Gallatin 	context = &uc.uc_mcontext;
78779363394SAndrew Gallatin 
78879363394SAndrew Gallatin 	/*
78979363394SAndrew Gallatin 	 * Check for security violations.
79079363394SAndrew Gallatin 	 */
79179363394SAndrew Gallatin #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
79279363394SAndrew Gallatin 	eflags = context->sc_eflags;
7933d271aaaSEd Maste 	if (!EFLAGS_SECURE(eflags, regs->tf_eflags))
79479363394SAndrew Gallatin 		return (EINVAL);
79579363394SAndrew Gallatin 
79679363394SAndrew Gallatin 	/*
79779363394SAndrew Gallatin 	 * Don't allow users to load a valid privileged %cs.  Let the
79879363394SAndrew Gallatin 	 * hardware check for invalid selectors, excess privilege in
79979363394SAndrew Gallatin 	 * other selectors, invalid %eip's and invalid %esp's.
80079363394SAndrew Gallatin 	 */
80179363394SAndrew Gallatin #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
80279363394SAndrew Gallatin 	if (!CS_SECURE(context->sc_cs)) {
8039104847fSDavid Xu 		ksiginfo_init_trap(&ksi);
8049104847fSDavid Xu 		ksi.ksi_signo = SIGBUS;
8059104847fSDavid Xu 		ksi.ksi_code = BUS_OBJERR;
8069104847fSDavid Xu 		ksi.ksi_trapno = T_PROTFLT;
8079104847fSDavid Xu 		ksi.ksi_addr = (void *)regs->tf_eip;
8089104847fSDavid Xu 		trapsignal(td, &ksi);
80979363394SAndrew Gallatin 		return (EINVAL);
81079363394SAndrew Gallatin 	}
81179363394SAndrew Gallatin 
812d6e029adSKonstantin Belousov 	linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
813d6e029adSKonstantin Belousov 	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
81479363394SAndrew Gallatin 
81579363394SAndrew Gallatin 	/*
816cc6ca9b3SMarcel Moolenaar 	 * Restore signal context
81779363394SAndrew Gallatin 	 */
81879363394SAndrew Gallatin 	/* %gs was restored by the trampoline. */
81979363394SAndrew Gallatin 	regs->tf_fs     = context->sc_fs;
82079363394SAndrew Gallatin 	regs->tf_es     = context->sc_es;
82179363394SAndrew Gallatin 	regs->tf_ds     = context->sc_ds;
82279363394SAndrew Gallatin 	regs->tf_edi    = context->sc_edi;
82379363394SAndrew Gallatin 	regs->tf_esi    = context->sc_esi;
82479363394SAndrew Gallatin 	regs->tf_ebp    = context->sc_ebp;
82579363394SAndrew Gallatin 	regs->tf_ebx    = context->sc_ebx;
82679363394SAndrew Gallatin 	regs->tf_edx    = context->sc_edx;
82779363394SAndrew Gallatin 	regs->tf_ecx    = context->sc_ecx;
82879363394SAndrew Gallatin 	regs->tf_eax    = context->sc_eax;
82979363394SAndrew Gallatin 	regs->tf_eip    = context->sc_eip;
83079363394SAndrew Gallatin 	regs->tf_cs     = context->sc_cs;
83179363394SAndrew Gallatin 	regs->tf_eflags = eflags;
83279363394SAndrew Gallatin 	regs->tf_esp    = context->sc_esp_at_signal;
83379363394SAndrew Gallatin 	regs->tf_ss     = context->sc_ss;
83479363394SAndrew Gallatin 
83579363394SAndrew Gallatin 	/*
83679363394SAndrew Gallatin 	 * call sigaltstack & ignore results..
83779363394SAndrew Gallatin 	 */
83879363394SAndrew Gallatin 	lss = &uc.uc_stack;
839206a5d3aSIan Dowse 	ss.ss_sp = lss->ss_sp;
840206a5d3aSIan Dowse 	ss.ss_size = lss->ss_size;
841206a5d3aSIan Dowse 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
84279363394SAndrew Gallatin 
84379363394SAndrew Gallatin #ifdef DEBUG
84424593369SJonathan Lemon 	if (ldebug(rt_sigreturn))
84524593369SJonathan Lemon 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
846206a5d3aSIan Dowse 		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
84779363394SAndrew Gallatin #endif
848206a5d3aSIan Dowse 	(void)kern_sigaltstack(td, &ss, NULL);
84979363394SAndrew Gallatin 
85079363394SAndrew Gallatin 	return (EJUSTRETURN);
85179363394SAndrew Gallatin }
85279363394SAndrew Gallatin 
853afe1a688SKonstantin Belousov static int
854afe1a688SKonstantin Belousov linux_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
855d66a5066SPeter Wemm {
856afe1a688SKonstantin Belousov 	struct proc *p;
857afe1a688SKonstantin Belousov 	struct trapframe *frame;
858afe1a688SKonstantin Belousov 
859afe1a688SKonstantin Belousov 	p = td->td_proc;
860afe1a688SKonstantin Belousov 	frame = td->td_frame;
861afe1a688SKonstantin Belousov 
862afe1a688SKonstantin Belousov 	sa->code = frame->tf_eax;
863afe1a688SKonstantin Belousov 	sa->args[0] = frame->tf_ebx;
864afe1a688SKonstantin Belousov 	sa->args[1] = frame->tf_ecx;
865afe1a688SKonstantin Belousov 	sa->args[2] = frame->tf_edx;
866afe1a688SKonstantin Belousov 	sa->args[3] = frame->tf_esi;
867afe1a688SKonstantin Belousov 	sa->args[4] = frame->tf_edi;
868afe1a688SKonstantin Belousov 	sa->args[5] = frame->tf_ebp;	/* Unconfirmed */
869afe1a688SKonstantin Belousov 
870afe1a688SKonstantin Belousov 	if (sa->code >= p->p_sysent->sv_size)
871afe1a688SKonstantin Belousov 		sa->callp = &p->p_sysent->sv_table[0];
872afe1a688SKonstantin Belousov  	else
873afe1a688SKonstantin Belousov  		sa->callp = &p->p_sysent->sv_table[sa->code];
874afe1a688SKonstantin Belousov 	sa->narg = sa->callp->sy_narg;
875afe1a688SKonstantin Belousov 
876afe1a688SKonstantin Belousov 	td->td_retval[0] = 0;
877afe1a688SKonstantin Belousov 	td->td_retval[1] = frame->tf_edx;
878afe1a688SKonstantin Belousov 
879afe1a688SKonstantin Belousov 	return (0);
880d66a5066SPeter Wemm }
881d66a5066SPeter Wemm 
882d323ddf3SMatthew Dillon /*
883d323ddf3SMatthew Dillon  * If a linux binary is exec'ing something, try this image activator
884d323ddf3SMatthew Dillon  * first.  We override standard shell script execution in order to
885d323ddf3SMatthew Dillon  * be able to modify the interpreter path.  We only do this if a linux
886d323ddf3SMatthew Dillon  * binary is doing the exec, so we do not create an EXEC module for it.
887d323ddf3SMatthew Dillon  */
88889c9a483SAlfred Perlstein static int	exec_linux_imgact_try(struct image_params *iparams);
889d323ddf3SMatthew Dillon 
890d323ddf3SMatthew Dillon static int
891b07cd97eSMark Murray exec_linux_imgact_try(struct image_params *imgp)
892d323ddf3SMatthew Dillon {
893d323ddf3SMatthew Dillon     const char *head = (const char *)imgp->image_header;
8940311233eSJohn Baldwin     char *rpath;
895a14a9498SAlan Cox     int error = -1;
896d323ddf3SMatthew Dillon 
897d323ddf3SMatthew Dillon     /*
898d323ddf3SMatthew Dillon      * The interpreter for shell scripts run from a linux binary needs
899d323ddf3SMatthew Dillon      * to be located in /compat/linux if possible in order to recursively
900d323ddf3SMatthew Dillon      * maintain linux path emulation.
901d323ddf3SMatthew Dillon      */
902d323ddf3SMatthew Dillon     if (((const short *)head)[0] == SHELLMAGIC) {
903d323ddf3SMatthew Dillon 	    /*
904d323ddf3SMatthew Dillon 	     * Run our normal shell image activator.  If it succeeds attempt
905d323ddf3SMatthew Dillon 	     * to use the alternate path for the interpreter.  If an alternate
906d323ddf3SMatthew Dillon 	     * path is found, use our stringspace to store it.
907d323ddf3SMatthew Dillon 	     */
908d323ddf3SMatthew Dillon 	    if ((error = exec_shell_imgact(imgp)) == 0) {
9090311233eSJohn Baldwin 		    linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
91048b05c3fSKonstantin Belousov 			imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD);
911a14a9498SAlan Cox 		    if (rpath != NULL)
912a14a9498SAlan Cox 			    imgp->args->fname_buf =
913a14a9498SAlan Cox 				imgp->interpreter_name = rpath;
914d323ddf3SMatthew Dillon 	    }
915d323ddf3SMatthew Dillon     }
916d323ddf3SMatthew Dillon     return (error);
917d323ddf3SMatthew Dillon }
918d323ddf3SMatthew Dillon 
919598d45beSMatthew N. Dodd /*
920598d45beSMatthew N. Dodd  * exec_setregs may initialize some registers differently than Linux
921598d45beSMatthew N. Dodd  * does, thus potentially confusing Linux binaries. If necessary, we
922598d45beSMatthew N. Dodd  * override the exec_setregs default(s) here.
923598d45beSMatthew N. Dodd  */
924598d45beSMatthew N. Dodd static void
925a107d8aaSNathan Whitehorn exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack)
926598d45beSMatthew N. Dodd {
927598d45beSMatthew N. Dodd 	struct pcb *pcb = td->td_pcb;
928598d45beSMatthew N. Dodd 
929a107d8aaSNathan Whitehorn 	exec_setregs(td, imgp, stack);
930598d45beSMatthew N. Dodd 
931598d45beSMatthew N. Dodd 	/* Linux sets %gs to 0, we default to _udatasel */
9322ee8325fSJohn Baldwin 	pcb->pcb_gs = 0;
9332ee8325fSJohn Baldwin 	load_gs(0);
9342a51b9b0SDavid Schultz 
9352ee8325fSJohn Baldwin 	pcb->pcb_initial_npxcw = __LINUX_NPXCW__;
936598d45beSMatthew N. Dodd }
937598d45beSMatthew N. Dodd 
9384d7c2e8aSDmitry Chagin static void
9394d7c2e8aSDmitry Chagin linux_get_machine(const char **dst)
9404d7c2e8aSDmitry Chagin {
9414d7c2e8aSDmitry Chagin 
9424d7c2e8aSDmitry Chagin 	switch (cpu_class) {
9434d7c2e8aSDmitry Chagin 	case CPUCLASS_686:
9444d7c2e8aSDmitry Chagin 		*dst = "i686";
9454d7c2e8aSDmitry Chagin 		break;
9464d7c2e8aSDmitry Chagin 	case CPUCLASS_586:
9474d7c2e8aSDmitry Chagin 		*dst = "i586";
9484d7c2e8aSDmitry Chagin 		break;
9494d7c2e8aSDmitry Chagin 	case CPUCLASS_486:
9504d7c2e8aSDmitry Chagin 		*dst = "i486";
9514d7c2e8aSDmitry Chagin 		break;
9524d7c2e8aSDmitry Chagin 	default:
9534d7c2e8aSDmitry Chagin 		*dst = "i386";
9544d7c2e8aSDmitry Chagin 	}
9554d7c2e8aSDmitry Chagin }
9564d7c2e8aSDmitry Chagin 
957d66a5066SPeter Wemm struct sysentvec linux_sysvec = {
958a8d403e1SKonstantin Belousov 	.sv_size	= LINUX_SYS_MAXSYSCALL,
959a8d403e1SKonstantin Belousov 	.sv_table	= linux_sysent,
960a8d403e1SKonstantin Belousov 	.sv_mask	= 0,
961a8d403e1SKonstantin Belousov 	.sv_sigsize	= LINUX_SIGTBLSZ,
962a8d403e1SKonstantin Belousov 	.sv_sigtbl	= bsd_to_linux_signal,
963a8d403e1SKonstantin Belousov 	.sv_errsize	= ELAST + 1,
964a8d403e1SKonstantin Belousov 	.sv_errtbl	= bsd_to_linux_errno,
965a8d403e1SKonstantin Belousov 	.sv_transtrap	= translate_traps,
966a8d403e1SKonstantin Belousov 	.sv_fixup	= linux_fixup,
967a8d403e1SKonstantin Belousov 	.sv_sendsig	= linux_sendsig,
968bdc37934SDmitry Chagin 	.sv_sigcode	= &_binary_linux_locore_o_start,
969a8d403e1SKonstantin Belousov 	.sv_szsigcode	= &linux_szsigcode,
970afe1a688SKonstantin Belousov 	.sv_prepsyscall	= NULL,
971a8d403e1SKonstantin Belousov 	.sv_name	= "Linux a.out",
972a8d403e1SKonstantin Belousov 	.sv_coredump	= NULL,
973a8d403e1SKonstantin Belousov 	.sv_imgact_try	= exec_linux_imgact_try,
974a8d403e1SKonstantin Belousov 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
975a8d403e1SKonstantin Belousov 	.sv_pagesize	= PAGE_SIZE,
976a8d403e1SKonstantin Belousov 	.sv_minuser	= VM_MIN_ADDRESS,
977a8d403e1SKonstantin Belousov 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
9788f1e49a6SDmitry Chagin 	.sv_usrstack	= LINUX_USRSTACK,
979a8d403e1SKonstantin Belousov 	.sv_psstrings	= PS_STRINGS,
980a8d403e1SKonstantin Belousov 	.sv_stackprot	= VM_PROT_ALL,
981a8d403e1SKonstantin Belousov 	.sv_copyout_strings = exec_copyout_strings,
982a8d403e1SKonstantin Belousov 	.sv_setregs	= exec_linux_setregs,
983a8d403e1SKonstantin Belousov 	.sv_fixlimit	= NULL,
984b4cf0e62SKonstantin Belousov 	.sv_maxssiz	= NULL,
985afe1a688SKonstantin Belousov 	.sv_flags	= SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32,
986afe1a688SKonstantin Belousov 	.sv_set_syscall_retval = cpu_set_syscall_retval,
987afe1a688SKonstantin Belousov 	.sv_fetch_syscall_args = linux_fetch_syscall_args,
988afe1a688SKonstantin Belousov 	.sv_syscallnames = NULL,
9898f1e49a6SDmitry Chagin 	.sv_shared_page_base = LINUX_SHAREDPAGE,
9908f1e49a6SDmitry Chagin 	.sv_shared_page_len = PAGE_SIZE,
991e5d81ef1SDmitry Chagin 	.sv_schedtail	= linux_schedtail,
99281338031SDmitry Chagin 	.sv_thread_detach = linux_thread_detach,
993d66a5066SPeter Wemm };
9948f1e49a6SDmitry Chagin INIT_SYSENTVEC(aout_sysvec, &linux_sysvec);
995e1743d02SSøren Schmidt 
996e1743d02SSøren Schmidt struct sysentvec elf_linux_sysvec = {
997a8d403e1SKonstantin Belousov 	.sv_size	= LINUX_SYS_MAXSYSCALL,
998a8d403e1SKonstantin Belousov 	.sv_table	= linux_sysent,
999a8d403e1SKonstantin Belousov 	.sv_mask	= 0,
1000a8d403e1SKonstantin Belousov 	.sv_sigsize	= LINUX_SIGTBLSZ,
1001a8d403e1SKonstantin Belousov 	.sv_sigtbl	= bsd_to_linux_signal,
1002a8d403e1SKonstantin Belousov 	.sv_errsize	= ELAST + 1,
1003a8d403e1SKonstantin Belousov 	.sv_errtbl	= bsd_to_linux_errno,
1004a8d403e1SKonstantin Belousov 	.sv_transtrap	= translate_traps,
1005a8d403e1SKonstantin Belousov 	.sv_fixup	= elf_linux_fixup,
1006a8d403e1SKonstantin Belousov 	.sv_sendsig	= linux_sendsig,
1007bdc37934SDmitry Chagin 	.sv_sigcode	= &_binary_linux_locore_o_start,
1008a8d403e1SKonstantin Belousov 	.sv_szsigcode	= &linux_szsigcode,
1009afe1a688SKonstantin Belousov 	.sv_prepsyscall	= NULL,
1010a8d403e1SKonstantin Belousov 	.sv_name	= "Linux ELF",
1011a8d403e1SKonstantin Belousov 	.sv_coredump	= elf32_coredump,
1012a8d403e1SKonstantin Belousov 	.sv_imgact_try	= exec_linux_imgact_try,
1013a8d403e1SKonstantin Belousov 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
1014a8d403e1SKonstantin Belousov 	.sv_pagesize	= PAGE_SIZE,
1015a8d403e1SKonstantin Belousov 	.sv_minuser	= VM_MIN_ADDRESS,
1016a8d403e1SKonstantin Belousov 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
10178f1e49a6SDmitry Chagin 	.sv_usrstack	= LINUX_USRSTACK,
10188f1e49a6SDmitry Chagin 	.sv_psstrings	= LINUX_PS_STRINGS,
1019a8d403e1SKonstantin Belousov 	.sv_stackprot	= VM_PROT_ALL,
10204d7c2e8aSDmitry Chagin 	.sv_copyout_strings = linux_copyout_strings,
1021a8d403e1SKonstantin Belousov 	.sv_setregs	= exec_linux_setregs,
1022a8d403e1SKonstantin Belousov 	.sv_fixlimit	= NULL,
1023b4cf0e62SKonstantin Belousov 	.sv_maxssiz	= NULL,
10248f1e49a6SDmitry Chagin 	.sv_flags	= SV_ABI_LINUX | SV_IA32 | SV_ILP32 | SV_SHP,
1025afe1a688SKonstantin Belousov 	.sv_set_syscall_retval = cpu_set_syscall_retval,
1026afe1a688SKonstantin Belousov 	.sv_fetch_syscall_args = linux_fetch_syscall_args,
1027afe1a688SKonstantin Belousov 	.sv_syscallnames = NULL,
10288f1e49a6SDmitry Chagin 	.sv_shared_page_base = LINUX_SHAREDPAGE,
10298f1e49a6SDmitry Chagin 	.sv_shared_page_len = PAGE_SIZE,
1030e5d81ef1SDmitry Chagin 	.sv_schedtail	= linux_schedtail,
103181338031SDmitry Chagin 	.sv_thread_detach = linux_thread_detach,
1032e1743d02SSøren Schmidt };
1033bdc37934SDmitry Chagin 
1034bdc37934SDmitry Chagin static void
1035bdc37934SDmitry Chagin linux_vdso_install(void *param)
1036bdc37934SDmitry Chagin {
1037bdc37934SDmitry Chagin 
1038bdc37934SDmitry Chagin 	linux_szsigcode = (&_binary_linux_locore_o_end -
1039bdc37934SDmitry Chagin 	    &_binary_linux_locore_o_start);
1040bdc37934SDmitry Chagin 
1041bdc37934SDmitry Chagin 	if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len)
1042bdc37934SDmitry Chagin 		panic("Linux invalid vdso size\n");
1043bdc37934SDmitry Chagin 
1044bdc37934SDmitry Chagin 	__elfN(linux_vdso_fixup)(&elf_linux_sysvec);
1045bdc37934SDmitry Chagin 
1046bdc37934SDmitry Chagin 	linux_shared_page_obj = __elfN(linux_shared_page_init)
1047bdc37934SDmitry Chagin 	    (&linux_shared_page_mapping);
1048bdc37934SDmitry Chagin 
1049bdc37934SDmitry Chagin 	__elfN(linux_vdso_reloc)(&elf_linux_sysvec, LINUX_SHAREDPAGE);
1050bdc37934SDmitry Chagin 
1051bdc37934SDmitry Chagin 	bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping,
1052bdc37934SDmitry Chagin 	    linux_szsigcode);
1053bdc37934SDmitry Chagin 	elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj;
1054bdc37934SDmitry Chagin }
1055bdc37934SDmitry Chagin SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY,
1056bdc37934SDmitry Chagin     (sysinit_cfunc_t)linux_vdso_install, NULL);
1057bdc37934SDmitry Chagin 
1058bdc37934SDmitry Chagin static void
1059bdc37934SDmitry Chagin linux_vdso_deinstall(void *param)
1060bdc37934SDmitry Chagin {
1061bdc37934SDmitry Chagin 
1062bdc37934SDmitry Chagin 	__elfN(linux_shared_page_fini)(linux_shared_page_obj);
1063bdc37934SDmitry Chagin };
1064bdc37934SDmitry Chagin SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST,
1065bdc37934SDmitry Chagin     (sysinit_cfunc_t)linux_vdso_deinstall, NULL);
1066e1743d02SSøren Schmidt 
106789ffc202SBjoern A. Zeeb static char GNU_ABI_VENDOR[] = "GNU";
106889ffc202SBjoern A. Zeeb static int GNULINUX_ABI_DESC = 0;
106989ffc202SBjoern A. Zeeb 
107089ffc202SBjoern A. Zeeb static boolean_t
107189ffc202SBjoern A. Zeeb linux_trans_osrel(const Elf_Note *note, int32_t *osrel)
107289ffc202SBjoern A. Zeeb {
107389ffc202SBjoern A. Zeeb 	const Elf32_Word *desc;
107489ffc202SBjoern A. Zeeb 	uintptr_t p;
107589ffc202SBjoern A. Zeeb 
107689ffc202SBjoern A. Zeeb 	p = (uintptr_t)(note + 1);
107789ffc202SBjoern A. Zeeb 	p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
107889ffc202SBjoern A. Zeeb 
107989ffc202SBjoern A. Zeeb 	desc = (const Elf32_Word *)p;
108089ffc202SBjoern A. Zeeb 	if (desc[0] != GNULINUX_ABI_DESC)
108189ffc202SBjoern A. Zeeb 		return (FALSE);
108289ffc202SBjoern A. Zeeb 
108389ffc202SBjoern A. Zeeb 	/*
108489ffc202SBjoern A. Zeeb 	 * For linux we encode osrel as follows (see linux_mib.c):
108589ffc202SBjoern A. Zeeb 	 * VVVMMMIII (version, major, minor), see linux_mib.c.
108689ffc202SBjoern A. Zeeb 	 */
108789ffc202SBjoern A. Zeeb 	*osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
108889ffc202SBjoern A. Zeeb 
108989ffc202SBjoern A. Zeeb 	return (TRUE);
109089ffc202SBjoern A. Zeeb }
109132c01de2SDmitry Chagin 
109232c01de2SDmitry Chagin static Elf_Brandnote linux_brandnote = {
109389ffc202SBjoern A. Zeeb 	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),
109489ffc202SBjoern A. Zeeb 	.hdr.n_descsz	= 16,	/* XXX at least 16 */
109532c01de2SDmitry Chagin 	.hdr.n_type	= 1,
109689ffc202SBjoern A. Zeeb 	.vendor		= GNU_ABI_VENDOR,
109789ffc202SBjoern A. Zeeb 	.flags		= BN_TRANSLATE_OSREL,
109889ffc202SBjoern A. Zeeb 	.trans_osrel	= linux_trans_osrel
109932c01de2SDmitry Chagin };
110032c01de2SDmitry Chagin 
1101514058dcSAlexander Langer static Elf32_Brandinfo linux_brand = {
1102a8d403e1SKonstantin Belousov 	.brand		= ELFOSABI_LINUX,
1103a8d403e1SKonstantin Belousov 	.machine	= EM_386,
1104a8d403e1SKonstantin Belousov 	.compat_3_brand	= "Linux",
1105a8d403e1SKonstantin Belousov 	.emul_path	= "/compat/linux",
1106a8d403e1SKonstantin Belousov 	.interp_path	= "/lib/ld-linux.so.1",
1107a8d403e1SKonstantin Belousov 	.sysvec		= &elf_linux_sysvec,
1108a8d403e1SKonstantin Belousov 	.interp_newpath	= NULL,
110932c01de2SDmitry Chagin 	.brand_note	= &linux_brandnote,
11102dedc128SDmitry Chagin 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
11115cf588ebSPeter Wemm };
11125cf588ebSPeter Wemm 
1113514058dcSAlexander Langer static Elf32_Brandinfo linux_glibc2brand = {
1114a8d403e1SKonstantin Belousov 	.brand		= ELFOSABI_LINUX,
1115a8d403e1SKonstantin Belousov 	.machine	= EM_386,
1116a8d403e1SKonstantin Belousov 	.compat_3_brand	= "Linux",
1117a8d403e1SKonstantin Belousov 	.emul_path	= "/compat/linux",
1118a8d403e1SKonstantin Belousov 	.interp_path	= "/lib/ld-linux.so.2",
1119a8d403e1SKonstantin Belousov 	.sysvec		= &elf_linux_sysvec,
1120a8d403e1SKonstantin Belousov 	.interp_newpath	= NULL,
112132c01de2SDmitry Chagin 	.brand_note	= &linux_brandnote,
11222dedc128SDmitry Chagin 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
11234e138a28SMike Smith };
11244e138a28SMike Smith 
1125514058dcSAlexander Langer Elf32_Brandinfo *linux_brandlist[] = {
1126514058dcSAlexander Langer 	&linux_brand,
1127514058dcSAlexander Langer 	&linux_glibc2brand,
1128514058dcSAlexander Langer 	NULL
1129514058dcSAlexander Langer };
1130514058dcSAlexander Langer 
1131aa855a59SPeter Wemm static int
1132c25ded31SBruce Evans linux_elf_modevent(module_t mod, int type, void *data)
1133d30ea4f5SPeter Wemm {
1134514058dcSAlexander Langer 	Elf32_Brandinfo **brandinfo;
1135514058dcSAlexander Langer 	int error;
1136f41325dbSPeter Wemm 	struct linux_ioctl_handler **lihp;
1137514058dcSAlexander Langer 
1138514058dcSAlexander Langer 	error = 0;
1139514058dcSAlexander Langer 
1140aa855a59SPeter Wemm 	switch(type) {
1141aa855a59SPeter Wemm 	case MOD_LOAD:
1142aa855a59SPeter Wemm 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1143aa855a59SPeter Wemm 		     ++brandinfo)
11443ebc1248SPeter Wemm 			if (elf32_insert_brand_entry(*brandinfo) < 0)
1145aa855a59SPeter Wemm 				error = EINVAL;
1146466b14d7SMarcel Moolenaar 		if (error == 0) {
1147f41325dbSPeter Wemm 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1148f41325dbSPeter Wemm 				linux_ioctl_register_handler(*lihp);
11499b44bfc5SAlexander Leidinger 			LIST_INIT(&futex_list);
115079262bf1SDmitry Chagin 			mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
11519b44bfc5SAlexander Leidinger 			linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit,
11529b44bfc5SAlexander Leidinger 			      NULL, 1000);
11539b44bfc5SAlexander Leidinger 			linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec,
11549b44bfc5SAlexander Leidinger 			      NULL, 1000);
115581338031SDmitry Chagin 			linux_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor,
115681338031SDmitry Chagin 			    linux_thread_dtor, NULL, EVENTHANDLER_PRI_ANY);
11570020bdf1SDmitry Chagin 			linux_get_machine(&linux_kplatform);
11580020bdf1SDmitry Chagin 			linux_szplatform = roundup(strlen(linux_kplatform) + 1,
11594d7c2e8aSDmitry Chagin 			    sizeof(char *));
11607ae27ff4SJamie Gritton 			linux_osd_jail_register();
11611ca16454SDmitry Chagin 			stclohz = (stathz ? stathz : hz);
116243bef515SMarcel Moolenaar 			if (bootverbose)
1163466b14d7SMarcel Moolenaar 				printf("Linux ELF exec handler installed\n");
1164466b14d7SMarcel Moolenaar 		} else
1165466b14d7SMarcel Moolenaar 			printf("cannot insert Linux ELF brand handler\n");
1166aa855a59SPeter Wemm 		break;
1167aa855a59SPeter Wemm 	case MOD_UNLOAD:
1168aa855a59SPeter Wemm 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1169aa855a59SPeter Wemm 		     ++brandinfo)
11703ebc1248SPeter Wemm 			if (elf32_brand_inuse(*brandinfo))
1171d2758342SMark Newton 				error = EBUSY;
1172d2758342SMark Newton 		if (error == 0) {
1173d2758342SMark Newton 			for (brandinfo = &linux_brandlist[0];
1174d2758342SMark Newton 			     *brandinfo != NULL; ++brandinfo)
11753ebc1248SPeter Wemm 				if (elf32_remove_brand_entry(*brandinfo) < 0)
1176aa855a59SPeter Wemm 					error = EINVAL;
1177d2758342SMark Newton 		}
1178466b14d7SMarcel Moolenaar 		if (error == 0) {
1179f41325dbSPeter Wemm 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1180f41325dbSPeter Wemm 				linux_ioctl_unregister_handler(*lihp);
118179262bf1SDmitry Chagin 			mtx_destroy(&futex_mtx);
11829b44bfc5SAlexander Leidinger 			EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
11839b44bfc5SAlexander Leidinger 			EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
118481338031SDmitry Chagin 			EVENTHANDLER_DEREGISTER(thread_dtor, linux_thread_dtor_tag);
11857ae27ff4SJamie Gritton 			linux_osd_jail_deregister();
1186466b14d7SMarcel Moolenaar 			if (bootverbose)
1187466b14d7SMarcel Moolenaar 				printf("Linux ELF exec handler removed\n");
1188466b14d7SMarcel Moolenaar 		} else
1189aa855a59SPeter Wemm 			printf("Could not deinstall ELF interpreter entry\n");
1190aa855a59SPeter Wemm 		break;
1191aa855a59SPeter Wemm 	default:
1192af682d48SDmitry Chagin 		return (EOPNOTSUPP);
1193d30ea4f5SPeter Wemm 	}
1194af682d48SDmitry Chagin 	return (error);
1195aa855a59SPeter Wemm }
1196466b14d7SMarcel Moolenaar 
1197aa855a59SPeter Wemm static moduledata_t linux_elf_mod = {
1198aa855a59SPeter Wemm 	"linuxelf",
1199aa855a59SPeter Wemm 	linux_elf_modevent,
12009823d527SKevin Lo 	0
1201aa855a59SPeter Wemm };
1202466b14d7SMarcel Moolenaar 
120378ae4338SKonstantin Belousov DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1204