xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision d6e029adbef6c202441fc41b455610b4cc906ca5)
1d66a5066SPeter Wemm /*-
2e1743d02SSøren Schmidt  * Copyright (c) 1994-1996 S�ren Schmidt
3d66a5066SPeter Wemm  * All rights reserved.
4d66a5066SPeter Wemm  *
5d66a5066SPeter Wemm  * Redistribution and use in source and binary forms, with or without
6d66a5066SPeter Wemm  * modification, are permitted provided that the following conditions
7d66a5066SPeter Wemm  * are met:
8d66a5066SPeter Wemm  * 1. Redistributions of source code must retain the above copyright
9d66a5066SPeter Wemm  *    notice, this list of conditions and the following disclaimer
10d66a5066SPeter Wemm  *    in this position and unchanged.
11d66a5066SPeter Wemm  * 2. Redistributions in binary form must reproduce the above copyright
12d66a5066SPeter Wemm  *    notice, this list of conditions and the following disclaimer in the
13d66a5066SPeter Wemm  *    documentation and/or other materials provided with the distribution.
14d66a5066SPeter Wemm  * 3. The name of the author may not be used to endorse or promote products
1521dc7d4fSJens Schweikhardt  *    derived from this software without specific prior written permission
16d66a5066SPeter Wemm  *
17d66a5066SPeter Wemm  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18d66a5066SPeter Wemm  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19d66a5066SPeter Wemm  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20d66a5066SPeter Wemm  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21d66a5066SPeter Wemm  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22d66a5066SPeter Wemm  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23d66a5066SPeter Wemm  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24d66a5066SPeter Wemm  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25d66a5066SPeter Wemm  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26d66a5066SPeter Wemm  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27d66a5066SPeter Wemm  */
28d66a5066SPeter Wemm 
2927e0099cSDavid E. O'Brien #include <sys/cdefs.h>
3027e0099cSDavid E. O'Brien __FBSDID("$FreeBSD$");
3127e0099cSDavid E. O'Brien 
32d66a5066SPeter Wemm #include <sys/param.h>
3375f83872SPeter Wemm #include <sys/systm.h>
34ff22c670SBruce Evans #include <sys/exec.h>
3557b4252eSKonstantin Belousov #include <sys/fcntl.h>
36d66a5066SPeter Wemm #include <sys/imgact.h>
3722d4b0fbSJohn Polstra #include <sys/imgact_aout.h>
38e1743d02SSøren Schmidt #include <sys/imgact_elf.h>
39ff22c670SBruce Evans #include <sys/kernel.h>
407106ca0dSJohn Baldwin #include <sys/lock.h>
41e1743d02SSøren Schmidt #include <sys/malloc.h>
42ff22c670SBruce Evans #include <sys/module.h>
4323955314SAlfred Perlstein #include <sys/mutex.h>
44fb919e4dSMark Murray #include <sys/proc.h>
45fb919e4dSMark Murray #include <sys/signalvar.h>
46206a5d3aSIan Dowse #include <sys/syscallsubr.h>
47fb919e4dSMark Murray #include <sys/sysent.h>
48fb919e4dSMark Murray #include <sys/sysproto.h>
49a9148ab1SPeter Wemm #include <sys/vnode.h>
509b44bfc5SAlexander Leidinger #include <sys/eventhandler.h>
51fb919e4dSMark Murray 
52d66a5066SPeter Wemm #include <vm/vm.h>
53a9148ab1SPeter Wemm #include <vm/pmap.h>
54ff22c670SBruce Evans #include <vm/vm_extern.h>
55a9148ab1SPeter Wemm #include <vm/vm_map.h>
56a9148ab1SPeter Wemm #include <vm/vm_object.h>
57ff22c670SBruce Evans #include <vm/vm_page.h>
58ff22c670SBruce Evans #include <vm/vm_param.h>
59ff22c670SBruce Evans 
60ff22c670SBruce Evans #include <machine/cpu.h>
614d7c2e8aSDmitry Chagin #include <machine/cputypes.h>
62ff22c670SBruce Evans #include <machine/md_var.h>
63d3adf769SDavid Schultz #include <machine/pcb.h>
64a9148ab1SPeter Wemm 
65d66a5066SPeter Wemm #include <i386/linux/linux.h>
66ebea8660SMarcel Moolenaar #include <i386/linux/linux_proto.h>
67d789bfd5SDmitry Chagin #include <compat/linux/linux_futex.h>
6894cb2ecfSAlexander Leidinger #include <compat/linux/linux_emul.h>
690f9d6538SJohn Baldwin #include <compat/linux/linux_mib.h>
704d7c2e8aSDmitry Chagin #include <compat/linux/linux_misc.h>
71b595ab37SAndrew Gallatin #include <compat/linux/linux_signal.h>
72322bfdc3SMarcel Moolenaar #include <compat/linux/linux_util.h>
73e1743d02SSøren Schmidt 
741d91482dSPeter Wemm MODULE_VERSION(linux, 1);
751d91482dSPeter Wemm 
7643bef515SMarcel Moolenaar MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
7743bef515SMarcel Moolenaar 
78d323ddf3SMatthew Dillon #if BYTE_ORDER == LITTLE_ENDIAN
79d323ddf3SMatthew Dillon #define SHELLMAGIC      0x2123 /* #! */
80d323ddf3SMatthew Dillon #else
81d323ddf3SMatthew Dillon #define SHELLMAGIC      0x2321
82d323ddf3SMatthew Dillon #endif
83d323ddf3SMatthew Dillon 
84e061a6caSMarcel Moolenaar /*
85e061a6caSMarcel Moolenaar  * Allow the sendsig functions to use the ldebug() facility
86e061a6caSMarcel Moolenaar  * even though they are not syscalls themselves. Map them
87e061a6caSMarcel Moolenaar  * to syscall 0. This is slightly less bogus than using
88e061a6caSMarcel Moolenaar  * ldebug(sigreturn).
89e061a6caSMarcel Moolenaar  */
90e061a6caSMarcel Moolenaar #define	LINUX_SYS_linux_rt_sendsig	0
91e061a6caSMarcel Moolenaar #define	LINUX_SYS_linux_sendsig		0
92e061a6caSMarcel Moolenaar 
9343bef515SMarcel Moolenaar extern char linux_sigcode[];
9443bef515SMarcel Moolenaar extern int linux_szsigcode;
9543bef515SMarcel Moolenaar 
9643bef515SMarcel Moolenaar extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
9743bef515SMarcel Moolenaar 
98f41325dbSPeter Wemm SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
99060e4882SDoug Ambrisko SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
10043bef515SMarcel Moolenaar 
10189c9a483SAlfred Perlstein static int	linux_fixup(register_t **stack_base,
10289c9a483SAlfred Perlstein 		    struct image_params *iparams);
10389c9a483SAlfred Perlstein static int	elf_linux_fixup(register_t **stack_base,
10489c9a483SAlfred Perlstein 		    struct image_params *iparams);
105bda2a3afSBruce Evans static void	linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
106bda2a3afSBruce Evans 		    caddr_t *params);
1079104847fSDavid Xu static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
108598d45beSMatthew N. Dodd static void	exec_linux_setregs(struct thread *td, u_long entry,
109598d45beSMatthew N. Dodd 				   u_long stack, u_long ps_strings);
1104d7c2e8aSDmitry Chagin static register_t *linux_copyout_strings(struct image_params *imgp);
11189ffc202SBjoern A. Zeeb static boolean_t linux_trans_osrel(const Elf_Note *note, int32_t *osrel);
1124d7c2e8aSDmitry Chagin 
1134d7c2e8aSDmitry Chagin static int linux_szplatform;
1144d7c2e8aSDmitry Chagin const char *linux_platform;
115d66a5066SPeter Wemm 
1169b44bfc5SAlexander Leidinger static eventhandler_tag linux_exit_tag;
1179b44bfc5SAlexander Leidinger static eventhandler_tag linux_schedtail_tag;
1189b44bfc5SAlexander Leidinger static eventhandler_tag linux_exec_tag;
1199b44bfc5SAlexander Leidinger 
120d66a5066SPeter Wemm /*
121d66a5066SPeter Wemm  * Linux syscalls return negative errno's, we do positive and map them
12250e422f0SAlexander Leidinger  * Reference:
12350e422f0SAlexander Leidinger  *   FreeBSD: src/sys/sys/errno.h
12450e422f0SAlexander Leidinger  *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
12550e422f0SAlexander Leidinger  *            linux-2.6.17.8/include/asm-generic/errno.h
126d66a5066SPeter Wemm  */
12785f118c8SDmitrij Tejblum static int bsd_to_linux_errno[ELAST + 1] = {
128d66a5066SPeter Wemm 	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
129d66a5066SPeter Wemm 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
130d66a5066SPeter Wemm 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
131d66a5066SPeter Wemm 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
132d66a5066SPeter Wemm 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
133d66a5066SPeter Wemm 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
134d66a5066SPeter Wemm 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
135d66a5066SPeter Wemm 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
13650e422f0SAlexander Leidinger 	  -6,  -6, -43, -42, -75,-125, -84, -95, -16, -74,
13750e422f0SAlexander Leidinger 	 -72, -67, -71
138d66a5066SPeter Wemm };
139d66a5066SPeter Wemm 
140956d3333SMarcel Moolenaar int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
141956d3333SMarcel Moolenaar 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
142956d3333SMarcel Moolenaar 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
143ba873f4cSAlexander Kabaev 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
144956d3333SMarcel Moolenaar 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
145956d3333SMarcel Moolenaar 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
146956d3333SMarcel Moolenaar 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
147956d3333SMarcel Moolenaar 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
148956d3333SMarcel Moolenaar 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
149d66a5066SPeter Wemm };
150d66a5066SPeter Wemm 
151956d3333SMarcel Moolenaar int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
152956d3333SMarcel Moolenaar 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
153956d3333SMarcel Moolenaar 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
154956d3333SMarcel Moolenaar 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
155956d3333SMarcel Moolenaar 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
156956d3333SMarcel Moolenaar 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
157956d3333SMarcel Moolenaar 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
158956d3333SMarcel Moolenaar 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
159ba873f4cSAlexander Kabaev 	SIGIO, SIGURG, SIGSYS
160d66a5066SPeter Wemm };
161d66a5066SPeter Wemm 
16227a828fcSPierre Beyssac #define LINUX_T_UNKNOWN  255
16327a828fcSPierre Beyssac static int _bsd_to_linux_trapcode[] = {
16427a828fcSPierre Beyssac 	LINUX_T_UNKNOWN,	/* 0 */
16527a828fcSPierre Beyssac 	6,			/* 1  T_PRIVINFLT */
16627a828fcSPierre Beyssac 	LINUX_T_UNKNOWN,	/* 2 */
16727a828fcSPierre Beyssac 	3,			/* 3  T_BPTFLT */
16827a828fcSPierre Beyssac 	LINUX_T_UNKNOWN,	/* 4 */
16927a828fcSPierre Beyssac 	LINUX_T_UNKNOWN,	/* 5 */
17027a828fcSPierre Beyssac 	16,			/* 6  T_ARITHTRAP */
17127a828fcSPierre Beyssac 	254,			/* 7  T_ASTFLT */
17227a828fcSPierre Beyssac 	LINUX_T_UNKNOWN,	/* 8 */
17327a828fcSPierre Beyssac 	13,			/* 9  T_PROTFLT */
17427a828fcSPierre Beyssac 	1,			/* 10 T_TRCTRAP */
17527a828fcSPierre Beyssac 	LINUX_T_UNKNOWN,	/* 11 */
17627a828fcSPierre Beyssac 	14,			/* 12 T_PAGEFLT */
17727a828fcSPierre Beyssac 	LINUX_T_UNKNOWN,	/* 13 */
17827a828fcSPierre Beyssac 	17,			/* 14 T_ALIGNFLT */
17927a828fcSPierre Beyssac 	LINUX_T_UNKNOWN,	/* 15 */
18027a828fcSPierre Beyssac 	LINUX_T_UNKNOWN,	/* 16 */
18127a828fcSPierre Beyssac 	LINUX_T_UNKNOWN,	/* 17 */
18227a828fcSPierre Beyssac 	0,			/* 18 T_DIVIDE */
18327a828fcSPierre Beyssac 	2,			/* 19 T_NMI */
18427a828fcSPierre Beyssac 	4,			/* 20 T_OFLOW */
18527a828fcSPierre Beyssac 	5,			/* 21 T_BOUND */
18627a828fcSPierre Beyssac 	7,			/* 22 T_DNA */
18727a828fcSPierre Beyssac 	8,			/* 23 T_DOUBLEFLT */
18827a828fcSPierre Beyssac 	9,			/* 24 T_FPOPFLT */
18927a828fcSPierre Beyssac 	10,			/* 25 T_TSSFLT */
19027a828fcSPierre Beyssac 	11,			/* 26 T_SEGNPFLT */
19127a828fcSPierre Beyssac 	12,			/* 27 T_STKFLT */
19227a828fcSPierre Beyssac 	18,			/* 28 T_MCHK */
19327a828fcSPierre Beyssac 	19,			/* 29 T_XMMFLT */
19427a828fcSPierre Beyssac 	15			/* 30 T_RESERVED */
19527a828fcSPierre Beyssac };
19627a828fcSPierre Beyssac #define bsd_to_linux_trapcode(code) \
19727a828fcSPierre Beyssac     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
19827a828fcSPierre Beyssac      _bsd_to_linux_trapcode[(code)]: \
19927a828fcSPierre Beyssac      LINUX_T_UNKNOWN)
20027a828fcSPierre Beyssac 
201288078beSEivind Eklund /*
202288078beSEivind Eklund  * If FreeBSD & Linux have a difference of opinion about what a trap
203288078beSEivind Eklund  * means, deal with it here.
204356861dbSMatthew Dillon  *
205356861dbSMatthew Dillon  * MPSAFE
206288078beSEivind Eklund  */
207288078beSEivind Eklund static int
208288078beSEivind Eklund translate_traps(int signal, int trap_code)
209288078beSEivind Eklund {
210d563a53aSEivind Eklund 	if (signal != SIGBUS)
211d563a53aSEivind Eklund 		return signal;
212288078beSEivind Eklund 	switch (trap_code) {
213288078beSEivind Eklund 	case T_PROTFLT:
214288078beSEivind Eklund 	case T_TSSFLT:
215288078beSEivind Eklund 	case T_DOUBLEFLT:
216288078beSEivind Eklund 	case T_PAGEFLT:
217288078beSEivind Eklund 		return SIGSEGV;
218288078beSEivind Eklund 	default:
219288078beSEivind Eklund 		return signal;
220288078beSEivind Eklund 	}
221288078beSEivind Eklund }
222288078beSEivind Eklund 
223303b270bSEivind Eklund static int
224654f6be1SBruce Evans linux_fixup(register_t **stack_base, struct image_params *imgp)
225d66a5066SPeter Wemm {
226654f6be1SBruce Evans 	register_t *argv, *envp;
227d66a5066SPeter Wemm 
228d66a5066SPeter Wemm 	argv = *stack_base;
229610ecfe0SMaxim Sobolev 	envp = *stack_base + (imgp->args->argc + 1);
230d66a5066SPeter Wemm 	(*stack_base)--;
23186a14a7aSBruce Evans 	**stack_base = (intptr_t)(void *)envp;
232d66a5066SPeter Wemm 	(*stack_base)--;
23386a14a7aSBruce Evans 	**stack_base = (intptr_t)(void *)argv;
234d66a5066SPeter Wemm 	(*stack_base)--;
235610ecfe0SMaxim Sobolev 	**stack_base = imgp->args->argc;
2364d7c2e8aSDmitry Chagin 	return (0);
237d66a5066SPeter Wemm }
238d66a5066SPeter Wemm 
239303b270bSEivind Eklund static int
240654f6be1SBruce Evans elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
241e1743d02SSøren Schmidt {
2424d7c2e8aSDmitry Chagin 	struct proc *p;
24343cf129cSJohn Baldwin 	Elf32_Auxargs *args;
2444d7c2e8aSDmitry Chagin 	Elf32_Addr *uplatform;
2454d7c2e8aSDmitry Chagin 	struct ps_strings *arginfo;
246654f6be1SBruce Evans 	register_t *pos;
247d66a5066SPeter Wemm 
2486617724cSJeff Roberson 	KASSERT(curthread->td_proc == imgp->proc,
24943cf129cSJohn Baldwin 	    ("unsafe elf_linux_fixup(), should be curproc"));
2504d7c2e8aSDmitry Chagin 
2514d7c2e8aSDmitry Chagin 	p = imgp->proc;
2524d7c2e8aSDmitry Chagin 	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
2534d7c2e8aSDmitry Chagin 	uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szsigcode -
2544d7c2e8aSDmitry Chagin 	    linux_szplatform);
25543cf129cSJohn Baldwin 	args = (Elf32_Auxargs *)imgp->auxargs;
256610ecfe0SMaxim Sobolev 	pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
257e1743d02SSøren Schmidt 
2584d7c2e8aSDmitry Chagin 	AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature);
2598d30f381SDmitry Chagin 
2608d30f381SDmitry Chagin 	/*
2618d30f381SDmitry Chagin 	 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
2628d30f381SDmitry Chagin 	 * as it has appeared in the 2.4.0-rc7 first time.
2638d30f381SDmitry Chagin 	 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
2648d30f381SDmitry Chagin 	 * glibc falls back to the hard-coded CLK_TCK value when aux entry
2658d30f381SDmitry Chagin 	 * is not present.
2668d30f381SDmitry Chagin 	 * Also see linux_times() implementation.
2678d30f381SDmitry Chagin 	 */
2688d30f381SDmitry Chagin 	if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
2691ca16454SDmitry Chagin 		AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz);
270e1743d02SSøren Schmidt 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
271e1743d02SSøren Schmidt 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
272e1743d02SSøren Schmidt 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
273e1743d02SSøren Schmidt 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
274e1743d02SSøren Schmidt 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
275e1743d02SSøren Schmidt 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
276e1743d02SSøren Schmidt 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
2774d7c2e8aSDmitry Chagin 	AUXARGS_ENTRY(pos, LINUX_AT_SECURE, 0);
278b1fc0ec1SRobert Watson 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
279b1fc0ec1SRobert Watson 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
280b1fc0ec1SRobert Watson 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
281b1fc0ec1SRobert Watson 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
2824d7c2e8aSDmitry Chagin 	AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
2834d7c2e8aSDmitry Chagin 	if (args->execfd != -1)
2844d7c2e8aSDmitry Chagin 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
285e1743d02SSøren Schmidt 	AUXARGS_ENTRY(pos, AT_NULL, 0);
286e1743d02SSøren Schmidt 
287e1743d02SSøren Schmidt 	free(imgp->auxargs, M_TEMP);
288e1743d02SSøren Schmidt 	imgp->auxargs = NULL;
289e1743d02SSøren Schmidt 
290e1743d02SSøren Schmidt 	(*stack_base)--;
291610ecfe0SMaxim Sobolev 	**stack_base = (register_t)imgp->args->argc;
2924d7c2e8aSDmitry Chagin 	return (0);
293e1743d02SSøren Schmidt }
294d66a5066SPeter Wemm 
2954d7c2e8aSDmitry Chagin /*
2964d7c2e8aSDmitry Chagin  * Copied from kern/kern_exec.c
2974d7c2e8aSDmitry Chagin  */
2984d7c2e8aSDmitry Chagin static register_t *
2994d7c2e8aSDmitry Chagin linux_copyout_strings(struct image_params *imgp)
3004d7c2e8aSDmitry Chagin {
3014d7c2e8aSDmitry Chagin 	int argc, envc;
3024d7c2e8aSDmitry Chagin 	char **vectp;
3034d7c2e8aSDmitry Chagin 	char *stringp, *destp;
3044d7c2e8aSDmitry Chagin 	register_t *stack_base;
3054d7c2e8aSDmitry Chagin 	struct ps_strings *arginfo;
3064d7c2e8aSDmitry Chagin 	struct proc *p;
3074d7c2e8aSDmitry Chagin 
3084d7c2e8aSDmitry Chagin 	/*
3094d7c2e8aSDmitry Chagin 	 * Calculate string base and vector table pointers.
3104d7c2e8aSDmitry Chagin 	 * Also deal with signal trampoline code for this exec type.
3114d7c2e8aSDmitry Chagin 	 */
3124d7c2e8aSDmitry Chagin 	p = imgp->proc;
3134d7c2e8aSDmitry Chagin 	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
3144d7c2e8aSDmitry Chagin 	destp = (caddr_t)arginfo - linux_szsigcode - SPARE_USRSPACE -
3154d7c2e8aSDmitry Chagin 	    linux_szplatform - roundup((ARG_MAX - imgp->args->stringspace),
3164d7c2e8aSDmitry Chagin 	    sizeof(char *));
3174d7c2e8aSDmitry Chagin 
3184d7c2e8aSDmitry Chagin 	/*
3194d7c2e8aSDmitry Chagin 	 * install sigcode
3204d7c2e8aSDmitry Chagin 	 */
3214d7c2e8aSDmitry Chagin 	copyout(p->p_sysent->sv_sigcode, ((caddr_t)arginfo -
3224d7c2e8aSDmitry Chagin 	    linux_szsigcode), linux_szsigcode);
3234d7c2e8aSDmitry Chagin 
3244d7c2e8aSDmitry Chagin 	/*
3254d7c2e8aSDmitry Chagin 	 * install LINUX_PLATFORM
3264d7c2e8aSDmitry Chagin 	 */
3274d7c2e8aSDmitry Chagin 	copyout(linux_platform, ((caddr_t)arginfo - linux_szsigcode -
3284d7c2e8aSDmitry Chagin 	    linux_szplatform), linux_szplatform);
3294d7c2e8aSDmitry Chagin 
3304d7c2e8aSDmitry Chagin 	/*
3314d7c2e8aSDmitry Chagin 	 * If we have a valid auxargs ptr, prepare some room
3324d7c2e8aSDmitry Chagin 	 * on the stack.
3334d7c2e8aSDmitry Chagin 	 */
3344d7c2e8aSDmitry Chagin 	if (imgp->auxargs) {
3354d7c2e8aSDmitry Chagin 		/*
3364d7c2e8aSDmitry Chagin 		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
3374d7c2e8aSDmitry Chagin 		 * lower compatibility.
3384d7c2e8aSDmitry Chagin 		 */
3394d7c2e8aSDmitry Chagin 		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
3404d7c2e8aSDmitry Chagin 		    (LINUX_AT_COUNT * 2);
3414d7c2e8aSDmitry Chagin 		/*
3424d7c2e8aSDmitry Chagin 		 * The '+ 2' is for the null pointers at the end of each of
3434d7c2e8aSDmitry Chagin 		 * the arg and env vector sets,and imgp->auxarg_size is room
3444d7c2e8aSDmitry Chagin 		 * for argument of Runtime loader.
3454d7c2e8aSDmitry Chagin 		 */
3464d7c2e8aSDmitry Chagin 		vectp = (char **)(destp - (imgp->args->argc +
3474d7c2e8aSDmitry Chagin 		    imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *));
3484d7c2e8aSDmitry Chagin 	} else {
3494d7c2e8aSDmitry Chagin 		/*
3504d7c2e8aSDmitry Chagin 		 * The '+ 2' is for the null pointers at the end of each of
3514d7c2e8aSDmitry Chagin 		 * the arg and env vector sets
3524d7c2e8aSDmitry Chagin 		 */
3534d7c2e8aSDmitry Chagin 		vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) *
3544d7c2e8aSDmitry Chagin 		    sizeof(char *));
3554d7c2e8aSDmitry Chagin 	}
3564d7c2e8aSDmitry Chagin 
3574d7c2e8aSDmitry Chagin 	/*
3584d7c2e8aSDmitry Chagin 	 * vectp also becomes our initial stack base
3594d7c2e8aSDmitry Chagin 	 */
3604d7c2e8aSDmitry Chagin 	stack_base = (register_t *)vectp;
3614d7c2e8aSDmitry Chagin 
3624d7c2e8aSDmitry Chagin 	stringp = imgp->args->begin_argv;
3634d7c2e8aSDmitry Chagin 	argc = imgp->args->argc;
3644d7c2e8aSDmitry Chagin 	envc = imgp->args->envc;
3654d7c2e8aSDmitry Chagin 
3664d7c2e8aSDmitry Chagin 	/*
3674d7c2e8aSDmitry Chagin 	 * Copy out strings - arguments and environment.
3684d7c2e8aSDmitry Chagin 	 */
3694d7c2e8aSDmitry Chagin 	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
3704d7c2e8aSDmitry Chagin 
3714d7c2e8aSDmitry Chagin 	/*
3724d7c2e8aSDmitry Chagin 	 * Fill in "ps_strings" struct for ps, w, etc.
3734d7c2e8aSDmitry Chagin 	 */
3744d7c2e8aSDmitry Chagin 	suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
3754d7c2e8aSDmitry Chagin 	suword(&arginfo->ps_nargvstr, argc);
3764d7c2e8aSDmitry Chagin 
3774d7c2e8aSDmitry Chagin 	/*
3784d7c2e8aSDmitry Chagin 	 * Fill in argument portion of vector table.
3794d7c2e8aSDmitry Chagin 	 */
3804d7c2e8aSDmitry Chagin 	for (; argc > 0; --argc) {
3814d7c2e8aSDmitry Chagin 		suword(vectp++, (long)(intptr_t)destp);
3824d7c2e8aSDmitry Chagin 		while (*stringp++ != 0)
3834d7c2e8aSDmitry Chagin 			destp++;
3844d7c2e8aSDmitry Chagin 		destp++;
3854d7c2e8aSDmitry Chagin 	}
3864d7c2e8aSDmitry Chagin 
3874d7c2e8aSDmitry Chagin 	/* a null vector table pointer separates the argp's from the envp's */
3884d7c2e8aSDmitry Chagin 	suword(vectp++, 0);
3894d7c2e8aSDmitry Chagin 
3904d7c2e8aSDmitry Chagin 	suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
3914d7c2e8aSDmitry Chagin 	suword(&arginfo->ps_nenvstr, envc);
3924d7c2e8aSDmitry Chagin 
3934d7c2e8aSDmitry Chagin 	/*
3944d7c2e8aSDmitry Chagin 	 * Fill in environment portion of vector table.
3954d7c2e8aSDmitry Chagin 	 */
3964d7c2e8aSDmitry Chagin 	for (; envc > 0; --envc) {
3974d7c2e8aSDmitry Chagin 		suword(vectp++, (long)(intptr_t)destp);
3984d7c2e8aSDmitry Chagin 		while (*stringp++ != 0)
3994d7c2e8aSDmitry Chagin 			destp++;
4004d7c2e8aSDmitry Chagin 		destp++;
4014d7c2e8aSDmitry Chagin 	}
4024d7c2e8aSDmitry Chagin 
4034d7c2e8aSDmitry Chagin 	/* end of vector table is a null pointer */
4044d7c2e8aSDmitry Chagin 	suword(vectp, 0);
4054d7c2e8aSDmitry Chagin 
4064d7c2e8aSDmitry Chagin 	return (stack_base);
4074d7c2e8aSDmitry Chagin }
4084d7c2e8aSDmitry Chagin 
4094d7c2e8aSDmitry Chagin 
4104d7c2e8aSDmitry Chagin 
411d66a5066SPeter Wemm extern int _ucodesel, _udatasel;
41202318dacSJake Burkholder extern unsigned long linux_sznonrtsigcode;
41379363394SAndrew Gallatin 
41479363394SAndrew Gallatin static void
4159104847fSDavid Xu linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
41679363394SAndrew Gallatin {
4171d062e2bSDag-Erling Smørgrav 	struct thread *td = curthread;
4181d062e2bSDag-Erling Smørgrav 	struct proc *p = td->td_proc;
41990af4afaSJohn Baldwin 	struct sigacts *psp;
4201d062e2bSDag-Erling Smørgrav 	struct trapframe *regs;
4215002a60fSMarcel Moolenaar 	struct l_rt_sigframe *fp, frame;
4229104847fSDavid Xu 	int sig, code;
42379363394SAndrew Gallatin 	int oonstack;
42479363394SAndrew Gallatin 
4259104847fSDavid Xu 	sig = ksi->ksi_signo;
4269104847fSDavid Xu 	code = ksi->ksi_code;
427df53e91cSJohn Baldwin 	PROC_LOCK_ASSERT(p, MA_OWNED);
42890af4afaSJohn Baldwin 	psp = p->p_sigacts;
42990af4afaSJohn Baldwin 	mtx_assert(&psp->ps_mtx, MA_OWNED);
430b40ce416SJulian Elischer 	regs = td->td_frame;
431d034d459SMarcel Moolenaar 	oonstack = sigonstack(regs->tf_esp);
43279363394SAndrew Gallatin 
43379363394SAndrew Gallatin #ifdef DEBUG
4345002a60fSMarcel Moolenaar 	if (ldebug(rt_sendsig))
435728ef954SJohn Baldwin 		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
43624593369SJonathan Lemon 		    catcher, sig, (void*)mask, code);
43779363394SAndrew Gallatin #endif
43879363394SAndrew Gallatin 	/*
43979363394SAndrew Gallatin 	 * Allocate space for the signal handler context.
44079363394SAndrew Gallatin 	 */
441a30ec4b9SDavid Xu 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
44290af4afaSJohn Baldwin 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
443a30ec4b9SDavid Xu 		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
444a30ec4b9SDavid Xu 		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
445d034d459SMarcel Moolenaar 	} else
4465002a60fSMarcel Moolenaar 		fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
44790af4afaSJohn Baldwin 	mtx_unlock(&psp->ps_mtx);
44879363394SAndrew Gallatin 
44979363394SAndrew Gallatin 	/*
45079363394SAndrew Gallatin 	 * Build the argument list for the signal handler.
45179363394SAndrew Gallatin 	 */
45279363394SAndrew Gallatin 	if (p->p_sysent->sv_sigtbl)
45379363394SAndrew Gallatin 		if (sig <= p->p_sysent->sv_sigsize)
45479363394SAndrew Gallatin 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
45579363394SAndrew Gallatin 
45699d45c5fSMarcel Moolenaar 	bzero(&frame, sizeof(frame));
45799d45c5fSMarcel Moolenaar 
45879363394SAndrew Gallatin 	frame.sf_handler = catcher;
45979363394SAndrew Gallatin 	frame.sf_sig = sig;
46079363394SAndrew Gallatin 	frame.sf_siginfo = &fp->sf_si;
46179363394SAndrew Gallatin 	frame.sf_ucontext = &fp->sf_sc;
462cc6ca9b3SMarcel Moolenaar 
4639d05b77dSJuli Mallett 	/* Fill in POSIX parts */
464aa8b2011SKonstantin Belousov 	ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
465cc6ca9b3SMarcel Moolenaar 
46679363394SAndrew Gallatin 	/*
46779363394SAndrew Gallatin 	 * Build the signal context to be used by sigreturn.
46879363394SAndrew Gallatin 	 */
469cc6ca9b3SMarcel Moolenaar 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
470cc6ca9b3SMarcel Moolenaar 	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
471cc6ca9b3SMarcel Moolenaar 
472a30ec4b9SDavid Xu 	frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp;
473a30ec4b9SDavid Xu 	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
474a30ec4b9SDavid Xu 	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
475d034d459SMarcel Moolenaar 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
476611d9407SJohn Baldwin 	PROC_UNLOCK(p);
477cc6ca9b3SMarcel Moolenaar 
478cc6ca9b3SMarcel Moolenaar 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
479cc6ca9b3SMarcel Moolenaar 
480cc6ca9b3SMarcel Moolenaar 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
48179363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
48279363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
48379363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
48479363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
48579363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
48679363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
48779363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
48879363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
48979363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
49079363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
49179363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
49279363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
49379363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
49479363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
49579363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
49679363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
49779363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
49896a2b635SKonstantin Belousov 	frame.sf_sc.uc_mcontext.sc_cr2    = (register_t)ksi->ksi_addr;
49927a828fcSPierre Beyssac 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
50079363394SAndrew Gallatin 
50179363394SAndrew Gallatin #ifdef DEBUG
5025002a60fSMarcel Moolenaar 	if (ldebug(rt_sendsig))
50324593369SJonathan Lemon 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
5049b778a16SDavid Xu 		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
5059b778a16SDavid Xu 		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
50679363394SAndrew Gallatin #endif
50779363394SAndrew Gallatin 
50879363394SAndrew Gallatin 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
50979363394SAndrew Gallatin 		/*
51079363394SAndrew Gallatin 		 * Process has trashed its stack; give it an illegal
51179363394SAndrew Gallatin 		 * instruction to halt it in its tracks.
51279363394SAndrew Gallatin 		 */
51389734883SAlan Cox #ifdef DEBUG
51489734883SAlan Cox 		if (ldebug(rt_sendsig))
51589734883SAlan Cox 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
51689734883SAlan Cox 			    fp, oonstack);
51789734883SAlan Cox #endif
51819eb87d2SJohn Baldwin 		PROC_LOCK(p);
519b40ce416SJulian Elischer 		sigexit(td, SIGILL);
52079363394SAndrew Gallatin 	}
52179363394SAndrew Gallatin 
52279363394SAndrew Gallatin 	/*
52379363394SAndrew Gallatin 	 * Build context to run handler in.
52479363394SAndrew Gallatin 	 */
52579363394SAndrew Gallatin 	regs->tf_esp = (int)fp;
52679363394SAndrew Gallatin 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
52702318dacSJake Burkholder 	    linux_sznonrtsigcode;
52822eca0bfSKonstantin Belousov 	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
52979363394SAndrew Gallatin 	regs->tf_cs = _ucodesel;
53079363394SAndrew Gallatin 	regs->tf_ds = _udatasel;
53179363394SAndrew Gallatin 	regs->tf_es = _udatasel;
53279363394SAndrew Gallatin 	regs->tf_fs = _udatasel;
53379363394SAndrew Gallatin 	regs->tf_ss = _udatasel;
534df53e91cSJohn Baldwin 	PROC_LOCK(p);
53590af4afaSJohn Baldwin 	mtx_lock(&psp->ps_mtx);
53679363394SAndrew Gallatin }
53779363394SAndrew Gallatin 
538d66a5066SPeter Wemm 
539d66a5066SPeter Wemm /*
540d66a5066SPeter Wemm  * Send an interrupt to process.
541d66a5066SPeter Wemm  *
542d66a5066SPeter Wemm  * Stack is set up to allow sigcode stored
543d66a5066SPeter Wemm  * in u. to call routine, followed by kcall
544d66a5066SPeter Wemm  * to sigreturn routine below.  After sigreturn
545d66a5066SPeter Wemm  * resets the signal mask, the stack, and the
546d66a5066SPeter Wemm  * frame pointer, it returns to the user
547d66a5066SPeter Wemm  * specified pc, psl.
548d66a5066SPeter Wemm  */
549303b270bSEivind Eklund static void
5509104847fSDavid Xu linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
551d66a5066SPeter Wemm {
5521d062e2bSDag-Erling Smørgrav 	struct thread *td = curthread;
5531d062e2bSDag-Erling Smørgrav 	struct proc *p = td->td_proc;
55490af4afaSJohn Baldwin 	struct sigacts *psp;
5551d062e2bSDag-Erling Smørgrav 	struct trapframe *regs;
5565002a60fSMarcel Moolenaar 	struct l_sigframe *fp, frame;
5575002a60fSMarcel Moolenaar 	l_sigset_t lmask;
5589104847fSDavid Xu 	int sig, code;
5592c4ab9ddSAndrew Gallatin 	int oonstack, i;
560d66a5066SPeter Wemm 
5612509e6c2SJohn Baldwin 	PROC_LOCK_ASSERT(p, MA_OWNED);
56290af4afaSJohn Baldwin 	psp = p->p_sigacts;
5639104847fSDavid Xu 	sig = ksi->ksi_signo;
5649104847fSDavid Xu 	code = ksi->ksi_code;
56590af4afaSJohn Baldwin 	mtx_assert(&psp->ps_mtx, MA_OWNED);
56690af4afaSJohn Baldwin 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
567cc6ca9b3SMarcel Moolenaar 		/* Signal handler installed with SA_SIGINFO. */
5689104847fSDavid Xu 		linux_rt_sendsig(catcher, ksi, mask);
569cc6ca9b3SMarcel Moolenaar 		return;
570cc6ca9b3SMarcel Moolenaar 	}
571b40ce416SJulian Elischer 	regs = td->td_frame;
572d034d459SMarcel Moolenaar 	oonstack = sigonstack(regs->tf_esp);
573d66a5066SPeter Wemm 
574d66a5066SPeter Wemm #ifdef DEBUG
5755002a60fSMarcel Moolenaar 	if (ldebug(sendsig))
576728ef954SJohn Baldwin 		printf(ARGS(sendsig, "%p, %d, %p, %u"),
57724593369SJonathan Lemon 		    catcher, sig, (void*)mask, code);
578d66a5066SPeter Wemm #endif
57979363394SAndrew Gallatin 
580d66a5066SPeter Wemm 	/*
581d66a5066SPeter Wemm 	 * Allocate space for the signal handler context.
582d66a5066SPeter Wemm 	 */
583a30ec4b9SDavid Xu 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
58490af4afaSJohn Baldwin 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
585a30ec4b9SDavid Xu 		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
586a30ec4b9SDavid Xu 		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
587d034d459SMarcel Moolenaar 	} else
5885002a60fSMarcel Moolenaar 		fp = (struct l_sigframe *)regs->tf_esp - 1;
58990af4afaSJohn Baldwin 	mtx_unlock(&psp->ps_mtx);
590611d9407SJohn Baldwin 	PROC_UNLOCK(p);
591d66a5066SPeter Wemm 
592d66a5066SPeter Wemm 	/*
593d66a5066SPeter Wemm 	 * Build the argument list for the signal handler.
594d66a5066SPeter Wemm 	 */
595956d3333SMarcel Moolenaar 	if (p->p_sysent->sv_sigtbl)
596956d3333SMarcel Moolenaar 		if (sig <= p->p_sysent->sv_sigsize)
597956d3333SMarcel Moolenaar 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
598d66a5066SPeter Wemm 
59999d45c5fSMarcel Moolenaar 	bzero(&frame, sizeof(frame));
60099d45c5fSMarcel Moolenaar 
601d66a5066SPeter Wemm 	frame.sf_handler = catcher;
602d66a5066SPeter Wemm 	frame.sf_sig = sig;
603d66a5066SPeter Wemm 
604cc6ca9b3SMarcel Moolenaar 	bsd_to_linux_sigset(mask, &lmask);
605cc6ca9b3SMarcel Moolenaar 
606d66a5066SPeter Wemm 	/*
607d66a5066SPeter Wemm 	 * Build the signal context to be used by sigreturn.
608d66a5066SPeter Wemm 	 */
609cc6ca9b3SMarcel Moolenaar 	frame.sf_sc.sc_mask   = lmask.__bits[0];
6105206bca1SLuoqi Chen 	frame.sf_sc.sc_gs     = rgs();
6115206bca1SLuoqi Chen 	frame.sf_sc.sc_fs     = regs->tf_fs;
612213fdd80SPeter Wemm 	frame.sf_sc.sc_es     = regs->tf_es;
613213fdd80SPeter Wemm 	frame.sf_sc.sc_ds     = regs->tf_ds;
614213fdd80SPeter Wemm 	frame.sf_sc.sc_edi    = regs->tf_edi;
615213fdd80SPeter Wemm 	frame.sf_sc.sc_esi    = regs->tf_esi;
616213fdd80SPeter Wemm 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
617213fdd80SPeter Wemm 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
618213fdd80SPeter Wemm 	frame.sf_sc.sc_edx    = regs->tf_edx;
619213fdd80SPeter Wemm 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
620213fdd80SPeter Wemm 	frame.sf_sc.sc_eax    = regs->tf_eax;
621213fdd80SPeter Wemm 	frame.sf_sc.sc_eip    = regs->tf_eip;
622213fdd80SPeter Wemm 	frame.sf_sc.sc_cs     = regs->tf_cs;
623213fdd80SPeter Wemm 	frame.sf_sc.sc_eflags = regs->tf_eflags;
624213fdd80SPeter Wemm 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
625213fdd80SPeter Wemm 	frame.sf_sc.sc_ss     = regs->tf_ss;
626213fdd80SPeter Wemm 	frame.sf_sc.sc_err    = regs->tf_err;
62796a2b635SKonstantin Belousov 	frame.sf_sc.sc_cr2    = (register_t)ksi->ksi_addr;
6289104847fSDavid Xu 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno);
629cc6ca9b3SMarcel Moolenaar 
6302c4ab9ddSAndrew Gallatin 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
631cc6ca9b3SMarcel Moolenaar 		frame.sf_extramask[i] = lmask.__bits[i+1];
632d66a5066SPeter Wemm 
633d66a5066SPeter Wemm 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
634d66a5066SPeter Wemm 		/*
635d66a5066SPeter Wemm 		 * Process has trashed its stack; give it an illegal
636d66a5066SPeter Wemm 		 * instruction to halt it in its tracks.
637d66a5066SPeter Wemm 		 */
63819eb87d2SJohn Baldwin 		PROC_LOCK(p);
639b40ce416SJulian Elischer 		sigexit(td, SIGILL);
640d66a5066SPeter Wemm 	}
641d66a5066SPeter Wemm 
642d66a5066SPeter Wemm 	/*
643d66a5066SPeter Wemm 	 * Build context to run handler in.
644d66a5066SPeter Wemm 	 */
645213fdd80SPeter Wemm 	regs->tf_esp = (int)fp;
6464c56fcdeSBruce Evans 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
64722eca0bfSKonstantin Belousov 	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
648213fdd80SPeter Wemm 	regs->tf_cs = _ucodesel;
649213fdd80SPeter Wemm 	regs->tf_ds = _udatasel;
650213fdd80SPeter Wemm 	regs->tf_es = _udatasel;
6515206bca1SLuoqi Chen 	regs->tf_fs = _udatasel;
652213fdd80SPeter Wemm 	regs->tf_ss = _udatasel;
6535002a60fSMarcel Moolenaar 	PROC_LOCK(p);
65490af4afaSJohn Baldwin 	mtx_lock(&psp->ps_mtx);
655d66a5066SPeter Wemm }
656d66a5066SPeter Wemm 
657d66a5066SPeter Wemm /*
658d66a5066SPeter Wemm  * System call to cleanup state after a signal
659d66a5066SPeter Wemm  * has been taken.  Reset signal mask and
660d66a5066SPeter Wemm  * stack state from context left by sendsig (above).
661d66a5066SPeter Wemm  * Return to previous pc and psl as specified by
662d66a5066SPeter Wemm  * context left by sendsig. Check carefully to
663d66a5066SPeter Wemm  * make sure that the user has not modified the
664d66a5066SPeter Wemm  * psl to gain improper privileges or to cause
665d66a5066SPeter Wemm  * a machine fault.
666d66a5066SPeter Wemm  */
667d66a5066SPeter Wemm int
668b07cd97eSMark Murray linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
669d66a5066SPeter Wemm {
6705002a60fSMarcel Moolenaar 	struct l_sigframe frame;
6711d062e2bSDag-Erling Smørgrav 	struct trapframe *regs;
6725002a60fSMarcel Moolenaar 	l_sigset_t lmask;
673d6e029adSKonstantin Belousov 	sigset_t bmask;
6742c4ab9ddSAndrew Gallatin 	int eflags, i;
6759104847fSDavid Xu 	ksiginfo_t ksi;
676d66a5066SPeter Wemm 
677b40ce416SJulian Elischer 	regs = td->td_frame;
678d66a5066SPeter Wemm 
679d66a5066SPeter Wemm #ifdef DEBUG
68024593369SJonathan Lemon 	if (ldebug(sigreturn))
68124593369SJonathan Lemon 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
682d66a5066SPeter Wemm #endif
683d66a5066SPeter Wemm 	/*
684cc6ca9b3SMarcel Moolenaar 	 * The trampoline code hands us the sigframe.
685d66a5066SPeter Wemm 	 * It is unsafe to keep track of it ourselves, in the event that a
686d66a5066SPeter Wemm 	 * program jumps out of a signal handler.
687d66a5066SPeter Wemm 	 */
6884b7ef73dSDag-Erling Smørgrav 	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
689d66a5066SPeter Wemm 		return (EFAULT);
690d66a5066SPeter Wemm 
691d66a5066SPeter Wemm 	/*
692d66a5066SPeter Wemm 	 * Check for security violations.
693d66a5066SPeter Wemm 	 */
694d66a5066SPeter Wemm #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
695cc6ca9b3SMarcel Moolenaar 	eflags = frame.sf_sc.sc_eflags;
696d66a5066SPeter Wemm 	/*
697d66a5066SPeter Wemm 	 * XXX do allow users to change the privileged flag PSL_RF.  The
698d66a5066SPeter Wemm 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
699d66a5066SPeter Wemm 	 * sometimes set it there too.  tf_eflags is kept in the signal
700d66a5066SPeter Wemm 	 * context during signal handling and there is no other place
701d66a5066SPeter Wemm 	 * to remember it, so the PSL_RF bit may be corrupted by the
702d66a5066SPeter Wemm 	 * signal handler without us knowing.  Corruption of the PSL_RF
703d66a5066SPeter Wemm 	 * bit at worst causes one more or one less debugger trap, so
704d66a5066SPeter Wemm 	 * allowing it is fairly harmless.
705d66a5066SPeter Wemm 	 */
706b07cd97eSMark Murray 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
707d66a5066SPeter Wemm 		return(EINVAL);
708d66a5066SPeter Wemm 
709d66a5066SPeter Wemm 	/*
710d66a5066SPeter Wemm 	 * Don't allow users to load a valid privileged %cs.  Let the
711d66a5066SPeter Wemm 	 * hardware check for invalid selectors, excess privilege in
712d66a5066SPeter Wemm 	 * other selectors, invalid %eip's and invalid %esp's.
713d66a5066SPeter Wemm 	 */
71440d50994SPhilippe Charnier #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
715cc6ca9b3SMarcel Moolenaar 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
7169104847fSDavid Xu 		ksiginfo_init_trap(&ksi);
7179104847fSDavid Xu 		ksi.ksi_signo = SIGBUS;
7189104847fSDavid Xu 		ksi.ksi_code = BUS_OBJERR;
7199104847fSDavid Xu 		ksi.ksi_trapno = T_PROTFLT;
7209104847fSDavid Xu 		ksi.ksi_addr = (void *)regs->tf_eip;
7219104847fSDavid Xu 		trapsignal(td, &ksi);
722d66a5066SPeter Wemm 		return(EINVAL);
723d66a5066SPeter Wemm 	}
724d66a5066SPeter Wemm 
725cc6ca9b3SMarcel Moolenaar 	lmask.__bits[0] = frame.sf_sc.sc_mask;
7262c4ab9ddSAndrew Gallatin 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
727cc6ca9b3SMarcel Moolenaar 		lmask.__bits[i+1] = frame.sf_extramask[i];
728d6e029adSKonstantin Belousov 	linux_to_bsd_sigset(&lmask, &bmask);
729d6e029adSKonstantin Belousov 	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
730956d3333SMarcel Moolenaar 
731d66a5066SPeter Wemm 	/*
732d66a5066SPeter Wemm 	 * Restore signal context.
733d66a5066SPeter Wemm 	 */
7345206bca1SLuoqi Chen 	/* %gs was restored by the trampoline. */
735cc6ca9b3SMarcel Moolenaar 	regs->tf_fs     = frame.sf_sc.sc_fs;
736cc6ca9b3SMarcel Moolenaar 	regs->tf_es     = frame.sf_sc.sc_es;
737cc6ca9b3SMarcel Moolenaar 	regs->tf_ds     = frame.sf_sc.sc_ds;
738cc6ca9b3SMarcel Moolenaar 	regs->tf_edi    = frame.sf_sc.sc_edi;
739cc6ca9b3SMarcel Moolenaar 	regs->tf_esi    = frame.sf_sc.sc_esi;
740cc6ca9b3SMarcel Moolenaar 	regs->tf_ebp    = frame.sf_sc.sc_ebp;
741cc6ca9b3SMarcel Moolenaar 	regs->tf_ebx    = frame.sf_sc.sc_ebx;
742cc6ca9b3SMarcel Moolenaar 	regs->tf_edx    = frame.sf_sc.sc_edx;
743cc6ca9b3SMarcel Moolenaar 	regs->tf_ecx    = frame.sf_sc.sc_ecx;
744cc6ca9b3SMarcel Moolenaar 	regs->tf_eax    = frame.sf_sc.sc_eax;
745cc6ca9b3SMarcel Moolenaar 	regs->tf_eip    = frame.sf_sc.sc_eip;
746cc6ca9b3SMarcel Moolenaar 	regs->tf_cs     = frame.sf_sc.sc_cs;
747213fdd80SPeter Wemm 	regs->tf_eflags = eflags;
748cc6ca9b3SMarcel Moolenaar 	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
749cc6ca9b3SMarcel Moolenaar 	regs->tf_ss     = frame.sf_sc.sc_ss;
750d66a5066SPeter Wemm 
751d66a5066SPeter Wemm 	return (EJUSTRETURN);
752d66a5066SPeter Wemm }
753d66a5066SPeter Wemm 
75479363394SAndrew Gallatin /*
75579363394SAndrew Gallatin  * System call to cleanup state after a signal
75679363394SAndrew Gallatin  * has been taken.  Reset signal mask and
75779363394SAndrew Gallatin  * stack state from context left by rt_sendsig (above).
75879363394SAndrew Gallatin  * Return to previous pc and psl as specified by
75979363394SAndrew Gallatin  * context left by sendsig. Check carefully to
76079363394SAndrew Gallatin  * make sure that the user has not modified the
76179363394SAndrew Gallatin  * psl to gain improper privileges or to cause
76279363394SAndrew Gallatin  * a machine fault.
76379363394SAndrew Gallatin  */
76479363394SAndrew Gallatin int
765b07cd97eSMark Murray linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
76679363394SAndrew Gallatin {
7675002a60fSMarcel Moolenaar 	struct l_ucontext uc;
7685002a60fSMarcel Moolenaar 	struct l_sigcontext *context;
769d6e029adSKonstantin Belousov 	sigset_t bmask;
7705002a60fSMarcel Moolenaar 	l_stack_t *lss;
771206a5d3aSIan Dowse 	stack_t ss;
7721d062e2bSDag-Erling Smørgrav 	struct trapframe *regs;
77379363394SAndrew Gallatin 	int eflags;
7749104847fSDavid Xu 	ksiginfo_t ksi;
77579363394SAndrew Gallatin 
776b40ce416SJulian Elischer 	regs = td->td_frame;
77779363394SAndrew Gallatin 
77879363394SAndrew Gallatin #ifdef DEBUG
77924593369SJonathan Lemon 	if (ldebug(rt_sigreturn))
78024593369SJonathan Lemon 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
78179363394SAndrew Gallatin #endif
78279363394SAndrew Gallatin 	/*
783cc6ca9b3SMarcel Moolenaar 	 * The trampoline code hands us the ucontext.
78479363394SAndrew Gallatin 	 * It is unsafe to keep track of it ourselves, in the event that a
78579363394SAndrew Gallatin 	 * program jumps out of a signal handler.
78679363394SAndrew Gallatin 	 */
7874b7ef73dSDag-Erling Smørgrav 	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
78879363394SAndrew Gallatin 		return (EFAULT);
78979363394SAndrew Gallatin 
79079363394SAndrew Gallatin 	context = &uc.uc_mcontext;
79179363394SAndrew Gallatin 
79279363394SAndrew Gallatin 	/*
79379363394SAndrew Gallatin 	 * Check for security violations.
79479363394SAndrew Gallatin 	 */
79579363394SAndrew Gallatin #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
79679363394SAndrew Gallatin 	eflags = context->sc_eflags;
79779363394SAndrew Gallatin 	/*
79879363394SAndrew Gallatin 	 * XXX do allow users to change the privileged flag PSL_RF.  The
79979363394SAndrew Gallatin 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
80079363394SAndrew Gallatin 	 * sometimes set it there too.  tf_eflags is kept in the signal
80179363394SAndrew Gallatin 	 * context during signal handling and there is no other place
80279363394SAndrew Gallatin 	 * to remember it, so the PSL_RF bit may be corrupted by the
80379363394SAndrew Gallatin 	 * signal handler without us knowing.  Corruption of the PSL_RF
80479363394SAndrew Gallatin 	 * bit at worst causes one more or one less debugger trap, so
80579363394SAndrew Gallatin 	 * allowing it is fairly harmless.
80679363394SAndrew Gallatin 	 */
807b07cd97eSMark Murray 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
80879363394SAndrew Gallatin 		return(EINVAL);
80979363394SAndrew Gallatin 
81079363394SAndrew Gallatin 	/*
81179363394SAndrew Gallatin 	 * Don't allow users to load a valid privileged %cs.  Let the
81279363394SAndrew Gallatin 	 * hardware check for invalid selectors, excess privilege in
81379363394SAndrew Gallatin 	 * other selectors, invalid %eip's and invalid %esp's.
81479363394SAndrew Gallatin 	 */
81579363394SAndrew Gallatin #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
81679363394SAndrew Gallatin 	if (!CS_SECURE(context->sc_cs)) {
8179104847fSDavid Xu 		ksiginfo_init_trap(&ksi);
8189104847fSDavid Xu 		ksi.ksi_signo = SIGBUS;
8199104847fSDavid Xu 		ksi.ksi_code = BUS_OBJERR;
8209104847fSDavid Xu 		ksi.ksi_trapno = T_PROTFLT;
8219104847fSDavid Xu 		ksi.ksi_addr = (void *)regs->tf_eip;
8229104847fSDavid Xu 		trapsignal(td, &ksi);
82379363394SAndrew Gallatin 		return(EINVAL);
82479363394SAndrew Gallatin 	}
82579363394SAndrew Gallatin 
826d6e029adSKonstantin Belousov 	linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
827d6e029adSKonstantin Belousov 	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
82879363394SAndrew Gallatin 
82979363394SAndrew Gallatin 	/*
830cc6ca9b3SMarcel Moolenaar 	 * Restore signal context
83179363394SAndrew Gallatin 	 */
83279363394SAndrew Gallatin 	/* %gs was restored by the trampoline. */
83379363394SAndrew Gallatin 	regs->tf_fs     = context->sc_fs;
83479363394SAndrew Gallatin 	regs->tf_es     = context->sc_es;
83579363394SAndrew Gallatin 	regs->tf_ds     = context->sc_ds;
83679363394SAndrew Gallatin 	regs->tf_edi    = context->sc_edi;
83779363394SAndrew Gallatin 	regs->tf_esi    = context->sc_esi;
83879363394SAndrew Gallatin 	regs->tf_ebp    = context->sc_ebp;
83979363394SAndrew Gallatin 	regs->tf_ebx    = context->sc_ebx;
84079363394SAndrew Gallatin 	regs->tf_edx    = context->sc_edx;
84179363394SAndrew Gallatin 	regs->tf_ecx    = context->sc_ecx;
84279363394SAndrew Gallatin 	regs->tf_eax    = context->sc_eax;
84379363394SAndrew Gallatin 	regs->tf_eip    = context->sc_eip;
84479363394SAndrew Gallatin 	regs->tf_cs     = context->sc_cs;
84579363394SAndrew Gallatin 	regs->tf_eflags = eflags;
84679363394SAndrew Gallatin 	regs->tf_esp    = context->sc_esp_at_signal;
84779363394SAndrew Gallatin 	regs->tf_ss     = context->sc_ss;
84879363394SAndrew Gallatin 
84979363394SAndrew Gallatin 	/*
85079363394SAndrew Gallatin 	 * call sigaltstack & ignore results..
85179363394SAndrew Gallatin 	 */
85279363394SAndrew Gallatin 	lss = &uc.uc_stack;
853206a5d3aSIan Dowse 	ss.ss_sp = lss->ss_sp;
854206a5d3aSIan Dowse 	ss.ss_size = lss->ss_size;
855206a5d3aSIan Dowse 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
85679363394SAndrew Gallatin 
85779363394SAndrew Gallatin #ifdef DEBUG
85824593369SJonathan Lemon 	if (ldebug(rt_sigreturn))
85924593369SJonathan Lemon 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
860206a5d3aSIan Dowse 		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
86179363394SAndrew Gallatin #endif
862206a5d3aSIan Dowse 	(void)kern_sigaltstack(td, &ss, NULL);
86379363394SAndrew Gallatin 
86479363394SAndrew Gallatin 	return (EJUSTRETURN);
86579363394SAndrew Gallatin }
86679363394SAndrew Gallatin 
867356861dbSMatthew Dillon /*
868356861dbSMatthew Dillon  * MPSAFE
869356861dbSMatthew Dillon  */
870303b270bSEivind Eklund static void
871d66a5066SPeter Wemm linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
872d66a5066SPeter Wemm {
873d66a5066SPeter Wemm 	args[0] = tf->tf_ebx;
874d66a5066SPeter Wemm 	args[1] = tf->tf_ecx;
875d66a5066SPeter Wemm 	args[2] = tf->tf_edx;
876d66a5066SPeter Wemm 	args[3] = tf->tf_esi;
877d66a5066SPeter Wemm 	args[4] = tf->tf_edi;
8787646aefcSPeter Wemm 	args[5] = tf->tf_ebp;	/* Unconfirmed */
879d66a5066SPeter Wemm 	*params = NULL;		/* no copyin */
880d66a5066SPeter Wemm }
881d66a5066SPeter Wemm 
882d323ddf3SMatthew Dillon /*
883d323ddf3SMatthew Dillon  * If a linux binary is exec'ing something, try this image activator
884d323ddf3SMatthew Dillon  * first.  We override standard shell script execution in order to
885d323ddf3SMatthew Dillon  * be able to modify the interpreter path.  We only do this if a linux
886d323ddf3SMatthew Dillon  * binary is doing the exec, so we do not create an EXEC module for it.
887d323ddf3SMatthew Dillon  */
88889c9a483SAlfred Perlstein static int	exec_linux_imgact_try(struct image_params *iparams);
889d323ddf3SMatthew Dillon 
890d323ddf3SMatthew Dillon static int
891b07cd97eSMark Murray exec_linux_imgact_try(struct image_params *imgp)
892d323ddf3SMatthew Dillon {
893d323ddf3SMatthew Dillon     const char *head = (const char *)imgp->image_header;
8940311233eSJohn Baldwin     char *rpath;
8950311233eSJohn Baldwin     int error = -1, len;
896d323ddf3SMatthew Dillon 
897d323ddf3SMatthew Dillon     /*
898d323ddf3SMatthew Dillon      * The interpreter for shell scripts run from a linux binary needs
899d323ddf3SMatthew Dillon      * to be located in /compat/linux if possible in order to recursively
900d323ddf3SMatthew Dillon      * maintain linux path emulation.
901d323ddf3SMatthew Dillon      */
902d323ddf3SMatthew Dillon     if (((const short *)head)[0] == SHELLMAGIC) {
903d323ddf3SMatthew Dillon 	    /*
904d323ddf3SMatthew Dillon 	     * Run our normal shell image activator.  If it succeeds attempt
905d323ddf3SMatthew Dillon 	     * to use the alternate path for the interpreter.  If an alternate
906d323ddf3SMatthew Dillon 	     * path is found, use our stringspace to store it.
907d323ddf3SMatthew Dillon 	     */
908d323ddf3SMatthew Dillon 	    if ((error = exec_shell_imgact(imgp)) == 0) {
9090311233eSJohn Baldwin 		    linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
91048b05c3fSKonstantin Belousov 			imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD);
9110311233eSJohn Baldwin 		    if (rpath != NULL) {
9120311233eSJohn Baldwin 			    len = strlen(rpath) + 1;
913d323ddf3SMatthew Dillon 
914d323ddf3SMatthew Dillon 			    if (len <= MAXSHELLCMDLEN) {
915d323ddf3SMatthew Dillon 				    memcpy(imgp->interpreter_name, rpath, len);
916d323ddf3SMatthew Dillon 			    }
917d323ddf3SMatthew Dillon 			    free(rpath, M_TEMP);
918d323ddf3SMatthew Dillon 		    }
919d323ddf3SMatthew Dillon 	    }
920d323ddf3SMatthew Dillon     }
921d323ddf3SMatthew Dillon     return(error);
922d323ddf3SMatthew Dillon }
923d323ddf3SMatthew Dillon 
924598d45beSMatthew N. Dodd /*
925598d45beSMatthew N. Dodd  * exec_setregs may initialize some registers differently than Linux
926598d45beSMatthew N. Dodd  * does, thus potentially confusing Linux binaries. If necessary, we
927598d45beSMatthew N. Dodd  * override the exec_setregs default(s) here.
928598d45beSMatthew N. Dodd  */
929598d45beSMatthew N. Dodd static void
930598d45beSMatthew N. Dodd exec_linux_setregs(struct thread *td, u_long entry,
931598d45beSMatthew N. Dodd 		   u_long stack, u_long ps_strings)
932598d45beSMatthew N. Dodd {
933598d45beSMatthew N. Dodd 	struct pcb *pcb = td->td_pcb;
934598d45beSMatthew N. Dodd 
935598d45beSMatthew N. Dodd 	exec_setregs(td, entry, stack, ps_strings);
936598d45beSMatthew N. Dodd 
937598d45beSMatthew N. Dodd 	/* Linux sets %gs to 0, we default to _udatasel */
9382ee8325fSJohn Baldwin 	pcb->pcb_gs = 0;
9392ee8325fSJohn Baldwin 	load_gs(0);
9402a51b9b0SDavid Schultz 
9412ee8325fSJohn Baldwin 	pcb->pcb_initial_npxcw = __LINUX_NPXCW__;
942598d45beSMatthew N. Dodd }
943598d45beSMatthew N. Dodd 
9444d7c2e8aSDmitry Chagin static void
9454d7c2e8aSDmitry Chagin linux_get_machine(const char **dst)
9464d7c2e8aSDmitry Chagin {
9474d7c2e8aSDmitry Chagin 
9484d7c2e8aSDmitry Chagin 	switch (cpu_class) {
9494d7c2e8aSDmitry Chagin 	case CPUCLASS_686:
9504d7c2e8aSDmitry Chagin 		*dst = "i686";
9514d7c2e8aSDmitry Chagin 		break;
9524d7c2e8aSDmitry Chagin 	case CPUCLASS_586:
9534d7c2e8aSDmitry Chagin 		*dst = "i586";
9544d7c2e8aSDmitry Chagin 		break;
9554d7c2e8aSDmitry Chagin 	case CPUCLASS_486:
9564d7c2e8aSDmitry Chagin 		*dst = "i486";
9574d7c2e8aSDmitry Chagin 		break;
9584d7c2e8aSDmitry Chagin 	default:
9594d7c2e8aSDmitry Chagin 		*dst = "i386";
9604d7c2e8aSDmitry Chagin 	}
9614d7c2e8aSDmitry Chagin }
9624d7c2e8aSDmitry Chagin 
963d66a5066SPeter Wemm struct sysentvec linux_sysvec = {
964a8d403e1SKonstantin Belousov 	.sv_size	= LINUX_SYS_MAXSYSCALL,
965a8d403e1SKonstantin Belousov 	.sv_table	= linux_sysent,
966a8d403e1SKonstantin Belousov 	.sv_mask	= 0,
967a8d403e1SKonstantin Belousov 	.sv_sigsize	= LINUX_SIGTBLSZ,
968a8d403e1SKonstantin Belousov 	.sv_sigtbl	= bsd_to_linux_signal,
969a8d403e1SKonstantin Belousov 	.sv_errsize	= ELAST + 1,
970a8d403e1SKonstantin Belousov 	.sv_errtbl	= bsd_to_linux_errno,
971a8d403e1SKonstantin Belousov 	.sv_transtrap	= translate_traps,
972a8d403e1SKonstantin Belousov 	.sv_fixup	= linux_fixup,
973a8d403e1SKonstantin Belousov 	.sv_sendsig	= linux_sendsig,
974a8d403e1SKonstantin Belousov 	.sv_sigcode	= linux_sigcode,
975a8d403e1SKonstantin Belousov 	.sv_szsigcode	= &linux_szsigcode,
976a8d403e1SKonstantin Belousov 	.sv_prepsyscall	= linux_prepsyscall,
977a8d403e1SKonstantin Belousov 	.sv_name	= "Linux a.out",
978a8d403e1SKonstantin Belousov 	.sv_coredump	= NULL,
979a8d403e1SKonstantin Belousov 	.sv_imgact_try	= exec_linux_imgact_try,
980a8d403e1SKonstantin Belousov 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
981a8d403e1SKonstantin Belousov 	.sv_pagesize	= PAGE_SIZE,
982a8d403e1SKonstantin Belousov 	.sv_minuser	= VM_MIN_ADDRESS,
983a8d403e1SKonstantin Belousov 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
984a8d403e1SKonstantin Belousov 	.sv_usrstack	= USRSTACK,
985a8d403e1SKonstantin Belousov 	.sv_psstrings	= PS_STRINGS,
986a8d403e1SKonstantin Belousov 	.sv_stackprot	= VM_PROT_ALL,
987a8d403e1SKonstantin Belousov 	.sv_copyout_strings = exec_copyout_strings,
988a8d403e1SKonstantin Belousov 	.sv_setregs	= exec_linux_setregs,
989a8d403e1SKonstantin Belousov 	.sv_fixlimit	= NULL,
990b4cf0e62SKonstantin Belousov 	.sv_maxssiz	= NULL,
991b4cf0e62SKonstantin Belousov 	.sv_flags	= SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32
992d66a5066SPeter Wemm };
993e1743d02SSøren Schmidt 
994e1743d02SSøren Schmidt struct sysentvec elf_linux_sysvec = {
995a8d403e1SKonstantin Belousov 	.sv_size	= LINUX_SYS_MAXSYSCALL,
996a8d403e1SKonstantin Belousov 	.sv_table	= linux_sysent,
997a8d403e1SKonstantin Belousov 	.sv_mask	= 0,
998a8d403e1SKonstantin Belousov 	.sv_sigsize	= LINUX_SIGTBLSZ,
999a8d403e1SKonstantin Belousov 	.sv_sigtbl	= bsd_to_linux_signal,
1000a8d403e1SKonstantin Belousov 	.sv_errsize	= ELAST + 1,
1001a8d403e1SKonstantin Belousov 	.sv_errtbl	= bsd_to_linux_errno,
1002a8d403e1SKonstantin Belousov 	.sv_transtrap	= translate_traps,
1003a8d403e1SKonstantin Belousov 	.sv_fixup	= elf_linux_fixup,
1004a8d403e1SKonstantin Belousov 	.sv_sendsig	= linux_sendsig,
1005a8d403e1SKonstantin Belousov 	.sv_sigcode	= linux_sigcode,
1006a8d403e1SKonstantin Belousov 	.sv_szsigcode	= &linux_szsigcode,
1007a8d403e1SKonstantin Belousov 	.sv_prepsyscall	= linux_prepsyscall,
1008a8d403e1SKonstantin Belousov 	.sv_name	= "Linux ELF",
1009a8d403e1SKonstantin Belousov 	.sv_coredump	= elf32_coredump,
1010a8d403e1SKonstantin Belousov 	.sv_imgact_try	= exec_linux_imgact_try,
1011a8d403e1SKonstantin Belousov 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
1012a8d403e1SKonstantin Belousov 	.sv_pagesize	= PAGE_SIZE,
1013a8d403e1SKonstantin Belousov 	.sv_minuser	= VM_MIN_ADDRESS,
1014a8d403e1SKonstantin Belousov 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
1015a8d403e1SKonstantin Belousov 	.sv_usrstack	= USRSTACK,
1016a8d403e1SKonstantin Belousov 	.sv_psstrings	= PS_STRINGS,
1017a8d403e1SKonstantin Belousov 	.sv_stackprot	= VM_PROT_ALL,
10184d7c2e8aSDmitry Chagin 	.sv_copyout_strings = linux_copyout_strings,
1019a8d403e1SKonstantin Belousov 	.sv_setregs	= exec_linux_setregs,
1020a8d403e1SKonstantin Belousov 	.sv_fixlimit	= NULL,
1021b4cf0e62SKonstantin Belousov 	.sv_maxssiz	= NULL,
1022b4cf0e62SKonstantin Belousov 	.sv_flags	= SV_ABI_LINUX | SV_IA32 | SV_ILP32
1023e1743d02SSøren Schmidt };
1024e1743d02SSøren Schmidt 
102589ffc202SBjoern A. Zeeb static char GNU_ABI_VENDOR[] = "GNU";
102689ffc202SBjoern A. Zeeb static int GNULINUX_ABI_DESC = 0;
102789ffc202SBjoern A. Zeeb 
102889ffc202SBjoern A. Zeeb static boolean_t
102989ffc202SBjoern A. Zeeb linux_trans_osrel(const Elf_Note *note, int32_t *osrel)
103089ffc202SBjoern A. Zeeb {
103189ffc202SBjoern A. Zeeb 	const Elf32_Word *desc;
103289ffc202SBjoern A. Zeeb 	uintptr_t p;
103389ffc202SBjoern A. Zeeb 
103489ffc202SBjoern A. Zeeb 	p = (uintptr_t)(note + 1);
103589ffc202SBjoern A. Zeeb 	p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
103689ffc202SBjoern A. Zeeb 
103789ffc202SBjoern A. Zeeb 	desc = (const Elf32_Word *)p;
103889ffc202SBjoern A. Zeeb 	if (desc[0] != GNULINUX_ABI_DESC)
103989ffc202SBjoern A. Zeeb 		return (FALSE);
104089ffc202SBjoern A. Zeeb 
104189ffc202SBjoern A. Zeeb 	/*
104289ffc202SBjoern A. Zeeb 	 * For linux we encode osrel as follows (see linux_mib.c):
104389ffc202SBjoern A. Zeeb 	 * VVVMMMIII (version, major, minor), see linux_mib.c.
104489ffc202SBjoern A. Zeeb 	 */
104589ffc202SBjoern A. Zeeb 	*osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
104689ffc202SBjoern A. Zeeb 
104789ffc202SBjoern A. Zeeb 	return (TRUE);
104889ffc202SBjoern A. Zeeb }
104932c01de2SDmitry Chagin 
105032c01de2SDmitry Chagin static Elf_Brandnote linux_brandnote = {
105189ffc202SBjoern A. Zeeb 	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),
105289ffc202SBjoern A. Zeeb 	.hdr.n_descsz	= 16,	/* XXX at least 16 */
105332c01de2SDmitry Chagin 	.hdr.n_type	= 1,
105489ffc202SBjoern A. Zeeb 	.vendor		= GNU_ABI_VENDOR,
105589ffc202SBjoern A. Zeeb 	.flags		= BN_TRANSLATE_OSREL,
105689ffc202SBjoern A. Zeeb 	.trans_osrel	= linux_trans_osrel
105732c01de2SDmitry Chagin };
105832c01de2SDmitry Chagin 
1059514058dcSAlexander Langer static Elf32_Brandinfo linux_brand = {
1060a8d403e1SKonstantin Belousov 	.brand		= ELFOSABI_LINUX,
1061a8d403e1SKonstantin Belousov 	.machine	= EM_386,
1062a8d403e1SKonstantin Belousov 	.compat_3_brand	= "Linux",
1063a8d403e1SKonstantin Belousov 	.emul_path	= "/compat/linux",
1064a8d403e1SKonstantin Belousov 	.interp_path	= "/lib/ld-linux.so.1",
1065a8d403e1SKonstantin Belousov 	.sysvec		= &elf_linux_sysvec,
1066a8d403e1SKonstantin Belousov 	.interp_newpath	= NULL,
106732c01de2SDmitry Chagin 	.brand_note	= &linux_brandnote,
1068cd899aadSDmitry Chagin 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
10695cf588ebSPeter Wemm };
10705cf588ebSPeter Wemm 
1071514058dcSAlexander Langer static Elf32_Brandinfo linux_glibc2brand = {
1072a8d403e1SKonstantin Belousov 	.brand		= ELFOSABI_LINUX,
1073a8d403e1SKonstantin Belousov 	.machine	= EM_386,
1074a8d403e1SKonstantin Belousov 	.compat_3_brand	= "Linux",
1075a8d403e1SKonstantin Belousov 	.emul_path	= "/compat/linux",
1076a8d403e1SKonstantin Belousov 	.interp_path	= "/lib/ld-linux.so.2",
1077a8d403e1SKonstantin Belousov 	.sysvec		= &elf_linux_sysvec,
1078a8d403e1SKonstantin Belousov 	.interp_newpath	= NULL,
107932c01de2SDmitry Chagin 	.brand_note	= &linux_brandnote,
1080cd899aadSDmitry Chagin 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
10814e138a28SMike Smith };
10824e138a28SMike Smith 
1083514058dcSAlexander Langer Elf32_Brandinfo *linux_brandlist[] = {
1084514058dcSAlexander Langer 	&linux_brand,
1085514058dcSAlexander Langer 	&linux_glibc2brand,
1086514058dcSAlexander Langer 	NULL
1087514058dcSAlexander Langer };
1088514058dcSAlexander Langer 
1089aa855a59SPeter Wemm static int
1090c25ded31SBruce Evans linux_elf_modevent(module_t mod, int type, void *data)
1091d30ea4f5SPeter Wemm {
1092514058dcSAlexander Langer 	Elf32_Brandinfo **brandinfo;
1093514058dcSAlexander Langer 	int error;
1094f41325dbSPeter Wemm 	struct linux_ioctl_handler **lihp;
1095060e4882SDoug Ambrisko 	struct linux_device_handler **ldhp;
1096514058dcSAlexander Langer 
1097514058dcSAlexander Langer 	error = 0;
1098514058dcSAlexander Langer 
1099aa855a59SPeter Wemm 	switch(type) {
1100aa855a59SPeter Wemm 	case MOD_LOAD:
1101aa855a59SPeter Wemm 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1102aa855a59SPeter Wemm 		     ++brandinfo)
11033ebc1248SPeter Wemm 			if (elf32_insert_brand_entry(*brandinfo) < 0)
1104aa855a59SPeter Wemm 				error = EINVAL;
1105466b14d7SMarcel Moolenaar 		if (error == 0) {
1106f41325dbSPeter Wemm 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1107f41325dbSPeter Wemm 				linux_ioctl_register_handler(*lihp);
1108060e4882SDoug Ambrisko 			SET_FOREACH(ldhp, linux_device_handler_set)
1109060e4882SDoug Ambrisko 				linux_device_register_handler(*ldhp);
1110357afa71SJung-uk Kim 			mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
11119b44bfc5SAlexander Leidinger 			sx_init(&emul_shared_lock, "emuldata->shared lock");
11129b44bfc5SAlexander Leidinger 			LIST_INIT(&futex_list);
111379262bf1SDmitry Chagin 			mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
11149b44bfc5SAlexander Leidinger 			linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit,
11159b44bfc5SAlexander Leidinger 			      NULL, 1000);
11169b44bfc5SAlexander Leidinger 			linux_schedtail_tag = EVENTHANDLER_REGISTER(schedtail, linux_schedtail,
11179b44bfc5SAlexander Leidinger 			      NULL, 1000);
11189b44bfc5SAlexander Leidinger 			linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec,
11199b44bfc5SAlexander Leidinger 			      NULL, 1000);
11204d7c2e8aSDmitry Chagin 			linux_get_machine(&linux_platform);
11214d7c2e8aSDmitry Chagin 			linux_szplatform = roundup(strlen(linux_platform) + 1,
11224d7c2e8aSDmitry Chagin 			    sizeof(char *));
11237ae27ff4SJamie Gritton 			linux_osd_jail_register();
11241ca16454SDmitry Chagin 			stclohz = (stathz ? stathz : hz);
112543bef515SMarcel Moolenaar 			if (bootverbose)
1126466b14d7SMarcel Moolenaar 				printf("Linux ELF exec handler installed\n");
1127466b14d7SMarcel Moolenaar 		} else
1128466b14d7SMarcel Moolenaar 			printf("cannot insert Linux ELF brand handler\n");
1129aa855a59SPeter Wemm 		break;
1130aa855a59SPeter Wemm 	case MOD_UNLOAD:
1131aa855a59SPeter Wemm 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1132aa855a59SPeter Wemm 		     ++brandinfo)
11333ebc1248SPeter Wemm 			if (elf32_brand_inuse(*brandinfo))
1134d2758342SMark Newton 				error = EBUSY;
1135d2758342SMark Newton 		if (error == 0) {
1136d2758342SMark Newton 			for (brandinfo = &linux_brandlist[0];
1137d2758342SMark Newton 			     *brandinfo != NULL; ++brandinfo)
11383ebc1248SPeter Wemm 				if (elf32_remove_brand_entry(*brandinfo) < 0)
1139aa855a59SPeter Wemm 					error = EINVAL;
1140d2758342SMark Newton 		}
1141466b14d7SMarcel Moolenaar 		if (error == 0) {
1142f41325dbSPeter Wemm 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1143f41325dbSPeter Wemm 				linux_ioctl_unregister_handler(*lihp);
1144060e4882SDoug Ambrisko 			SET_FOREACH(ldhp, linux_device_handler_set)
1145060e4882SDoug Ambrisko 				linux_device_unregister_handler(*ldhp);
1146357afa71SJung-uk Kim 			mtx_destroy(&emul_lock);
11479b44bfc5SAlexander Leidinger 			sx_destroy(&emul_shared_lock);
114879262bf1SDmitry Chagin 			mtx_destroy(&futex_mtx);
11499b44bfc5SAlexander Leidinger 			EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
11509b44bfc5SAlexander Leidinger 			EVENTHANDLER_DEREGISTER(schedtail, linux_schedtail_tag);
11519b44bfc5SAlexander Leidinger 			EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
11527ae27ff4SJamie Gritton 			linux_osd_jail_deregister();
1153466b14d7SMarcel Moolenaar 			if (bootverbose)
1154466b14d7SMarcel Moolenaar 				printf("Linux ELF exec handler removed\n");
1155466b14d7SMarcel Moolenaar 		} else
1156aa855a59SPeter Wemm 			printf("Could not deinstall ELF interpreter entry\n");
1157aa855a59SPeter Wemm 		break;
1158aa855a59SPeter Wemm 	default:
11593e019deaSPoul-Henning Kamp 		return EOPNOTSUPP;
1160d30ea4f5SPeter Wemm 	}
1161aa855a59SPeter Wemm 	return error;
1162aa855a59SPeter Wemm }
1163466b14d7SMarcel Moolenaar 
1164aa855a59SPeter Wemm static moduledata_t linux_elf_mod = {
1165aa855a59SPeter Wemm 	"linuxelf",
1166aa855a59SPeter Wemm 	linux_elf_modevent,
1167aa855a59SPeter Wemm 	0
1168aa855a59SPeter Wemm };
1169466b14d7SMarcel Moolenaar 
1170aa855a59SPeter Wemm DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1171