xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision 8d30f381ef4262426fae3baff499218555b512b7)
1d66a5066SPeter Wemm /*-
2e1743d02SSøren Schmidt  * Copyright (c) 1994-1996 S�ren Schmidt
3d66a5066SPeter Wemm  * All rights reserved.
4d66a5066SPeter Wemm  *
5d66a5066SPeter Wemm  * Redistribution and use in source and binary forms, with or without
6d66a5066SPeter Wemm  * modification, are permitted provided that the following conditions
7d66a5066SPeter Wemm  * are met:
8d66a5066SPeter Wemm  * 1. Redistributions of source code must retain the above copyright
9d66a5066SPeter Wemm  *    notice, this list of conditions and the following disclaimer
10d66a5066SPeter Wemm  *    in this position and unchanged.
11d66a5066SPeter Wemm  * 2. Redistributions in binary form must reproduce the above copyright
12d66a5066SPeter Wemm  *    notice, this list of conditions and the following disclaimer in the
13d66a5066SPeter Wemm  *    documentation and/or other materials provided with the distribution.
14d66a5066SPeter Wemm  * 3. The name of the author may not be used to endorse or promote products
1521dc7d4fSJens Schweikhardt  *    derived from this software without specific prior written permission
16d66a5066SPeter Wemm  *
17d66a5066SPeter Wemm  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18d66a5066SPeter Wemm  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19d66a5066SPeter Wemm  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20d66a5066SPeter Wemm  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21d66a5066SPeter Wemm  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22d66a5066SPeter Wemm  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23d66a5066SPeter Wemm  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24d66a5066SPeter Wemm  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25d66a5066SPeter Wemm  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26d66a5066SPeter Wemm  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27d66a5066SPeter Wemm  */
28d66a5066SPeter Wemm 
2927e0099cSDavid E. O'Brien #include <sys/cdefs.h>
3027e0099cSDavid E. O'Brien __FBSDID("$FreeBSD$");
3127e0099cSDavid E. O'Brien 
32d66a5066SPeter Wemm #include <sys/param.h>
3375f83872SPeter Wemm #include <sys/systm.h>
34ff22c670SBruce Evans #include <sys/exec.h>
3557b4252eSKonstantin Belousov #include <sys/fcntl.h>
36d66a5066SPeter Wemm #include <sys/imgact.h>
3722d4b0fbSJohn Polstra #include <sys/imgact_aout.h>
38e1743d02SSøren Schmidt #include <sys/imgact_elf.h>
39ff22c670SBruce Evans #include <sys/kernel.h>
407106ca0dSJohn Baldwin #include <sys/lock.h>
41e1743d02SSøren Schmidt #include <sys/malloc.h>
42ff22c670SBruce Evans #include <sys/module.h>
4323955314SAlfred Perlstein #include <sys/mutex.h>
44fb919e4dSMark Murray #include <sys/proc.h>
45fb919e4dSMark Murray #include <sys/signalvar.h>
46206a5d3aSIan Dowse #include <sys/syscallsubr.h>
47fb919e4dSMark Murray #include <sys/sysent.h>
48fb919e4dSMark Murray #include <sys/sysproto.h>
49a9148ab1SPeter Wemm #include <sys/vnode.h>
509b44bfc5SAlexander Leidinger #include <sys/eventhandler.h>
51fb919e4dSMark Murray 
52d66a5066SPeter Wemm #include <vm/vm.h>
53a9148ab1SPeter Wemm #include <vm/pmap.h>
54ff22c670SBruce Evans #include <vm/vm_extern.h>
55a9148ab1SPeter Wemm #include <vm/vm_map.h>
56a9148ab1SPeter Wemm #include <vm/vm_object.h>
57ff22c670SBruce Evans #include <vm/vm_page.h>
58ff22c670SBruce Evans #include <vm/vm_param.h>
59ff22c670SBruce Evans 
60ff22c670SBruce Evans #include <machine/cpu.h>
614d7c2e8aSDmitry Chagin #include <machine/cputypes.h>
62ff22c670SBruce Evans #include <machine/md_var.h>
63d3adf769SDavid Schultz #include <machine/pcb.h>
64a9148ab1SPeter Wemm 
65d66a5066SPeter Wemm #include <i386/linux/linux.h>
66ebea8660SMarcel Moolenaar #include <i386/linux/linux_proto.h>
67d789bfd5SDmitry Chagin #include <compat/linux/linux_futex.h>
6894cb2ecfSAlexander Leidinger #include <compat/linux/linux_emul.h>
690f9d6538SJohn Baldwin #include <compat/linux/linux_mib.h>
704d7c2e8aSDmitry Chagin #include <compat/linux/linux_misc.h>
71b595ab37SAndrew Gallatin #include <compat/linux/linux_signal.h>
72322bfdc3SMarcel Moolenaar #include <compat/linux/linux_util.h>
73e1743d02SSøren Schmidt 
741d91482dSPeter Wemm MODULE_VERSION(linux, 1);
751d91482dSPeter Wemm 
7643bef515SMarcel Moolenaar MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
7743bef515SMarcel Moolenaar 
78d323ddf3SMatthew Dillon #if BYTE_ORDER == LITTLE_ENDIAN
79d323ddf3SMatthew Dillon #define SHELLMAGIC      0x2123 /* #! */
80d323ddf3SMatthew Dillon #else
81d323ddf3SMatthew Dillon #define SHELLMAGIC      0x2321
82d323ddf3SMatthew Dillon #endif
83d323ddf3SMatthew Dillon 
84e061a6caSMarcel Moolenaar /*
85e061a6caSMarcel Moolenaar  * Allow the sendsig functions to use the ldebug() facility
86e061a6caSMarcel Moolenaar  * even though they are not syscalls themselves. Map them
87e061a6caSMarcel Moolenaar  * to syscall 0. This is slightly less bogus than using
88e061a6caSMarcel Moolenaar  * ldebug(sigreturn).
89e061a6caSMarcel Moolenaar  */
90e061a6caSMarcel Moolenaar #define	LINUX_SYS_linux_rt_sendsig	0
91e061a6caSMarcel Moolenaar #define	LINUX_SYS_linux_sendsig		0
92e061a6caSMarcel Moolenaar 
9343bef515SMarcel Moolenaar extern char linux_sigcode[];
9443bef515SMarcel Moolenaar extern int linux_szsigcode;
9543bef515SMarcel Moolenaar 
9643bef515SMarcel Moolenaar extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
9743bef515SMarcel Moolenaar 
98f41325dbSPeter Wemm SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
99060e4882SDoug Ambrisko SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
10043bef515SMarcel Moolenaar 
10189c9a483SAlfred Perlstein static int	linux_fixup(register_t **stack_base,
10289c9a483SAlfred Perlstein 		    struct image_params *iparams);
10389c9a483SAlfred Perlstein static int	elf_linux_fixup(register_t **stack_base,
10489c9a483SAlfred Perlstein 		    struct image_params *iparams);
105bda2a3afSBruce Evans static void	linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
106bda2a3afSBruce Evans 		    caddr_t *params);
1079104847fSDavid Xu static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
108598d45beSMatthew N. Dodd static void	exec_linux_setregs(struct thread *td, u_long entry,
109598d45beSMatthew N. Dodd 				   u_long stack, u_long ps_strings);
1104d7c2e8aSDmitry Chagin static register_t *linux_copyout_strings(struct image_params *imgp);
1114d7c2e8aSDmitry Chagin 
1124d7c2e8aSDmitry Chagin static int linux_szplatform;
1134d7c2e8aSDmitry Chagin const char *linux_platform;
114d66a5066SPeter Wemm 
1159b44bfc5SAlexander Leidinger static eventhandler_tag linux_exit_tag;
1169b44bfc5SAlexander Leidinger static eventhandler_tag linux_schedtail_tag;
1179b44bfc5SAlexander Leidinger static eventhandler_tag linux_exec_tag;
1189b44bfc5SAlexander Leidinger 
119d66a5066SPeter Wemm /*
120d66a5066SPeter Wemm  * Linux syscalls return negative errno's, we do positive and map them
12150e422f0SAlexander Leidinger  * Reference:
12250e422f0SAlexander Leidinger  *   FreeBSD: src/sys/sys/errno.h
12350e422f0SAlexander Leidinger  *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
12450e422f0SAlexander Leidinger  *            linux-2.6.17.8/include/asm-generic/errno.h
125d66a5066SPeter Wemm  */
12685f118c8SDmitrij Tejblum static int bsd_to_linux_errno[ELAST + 1] = {
127d66a5066SPeter Wemm 	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
128d66a5066SPeter Wemm 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
129d66a5066SPeter Wemm 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
130d66a5066SPeter Wemm 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
131d66a5066SPeter Wemm 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
132d66a5066SPeter Wemm 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
133d66a5066SPeter Wemm 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
134d66a5066SPeter Wemm 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
13550e422f0SAlexander Leidinger 	  -6,  -6, -43, -42, -75,-125, -84, -95, -16, -74,
13650e422f0SAlexander Leidinger 	 -72, -67, -71
137d66a5066SPeter Wemm };
138d66a5066SPeter Wemm 
139956d3333SMarcel Moolenaar int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
140956d3333SMarcel Moolenaar 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
141956d3333SMarcel Moolenaar 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
142ba873f4cSAlexander Kabaev 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
143956d3333SMarcel Moolenaar 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
144956d3333SMarcel Moolenaar 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
145956d3333SMarcel Moolenaar 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
146956d3333SMarcel Moolenaar 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
147956d3333SMarcel Moolenaar 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
148d66a5066SPeter Wemm };
149d66a5066SPeter Wemm 
150956d3333SMarcel Moolenaar int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
151956d3333SMarcel Moolenaar 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
152956d3333SMarcel Moolenaar 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
153956d3333SMarcel Moolenaar 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
154956d3333SMarcel Moolenaar 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
155956d3333SMarcel Moolenaar 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
156956d3333SMarcel Moolenaar 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
157956d3333SMarcel Moolenaar 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
158ba873f4cSAlexander Kabaev 	SIGIO, SIGURG, SIGSYS
159d66a5066SPeter Wemm };
160d66a5066SPeter Wemm 
16127a828fcSPierre Beyssac #define LINUX_T_UNKNOWN  255
16227a828fcSPierre Beyssac static int _bsd_to_linux_trapcode[] = {
16327a828fcSPierre Beyssac 	LINUX_T_UNKNOWN,	/* 0 */
16427a828fcSPierre Beyssac 	6,			/* 1  T_PRIVINFLT */
16527a828fcSPierre Beyssac 	LINUX_T_UNKNOWN,	/* 2 */
16627a828fcSPierre Beyssac 	3,			/* 3  T_BPTFLT */
16727a828fcSPierre Beyssac 	LINUX_T_UNKNOWN,	/* 4 */
16827a828fcSPierre Beyssac 	LINUX_T_UNKNOWN,	/* 5 */
16927a828fcSPierre Beyssac 	16,			/* 6  T_ARITHTRAP */
17027a828fcSPierre Beyssac 	254,			/* 7  T_ASTFLT */
17127a828fcSPierre Beyssac 	LINUX_T_UNKNOWN,	/* 8 */
17227a828fcSPierre Beyssac 	13,			/* 9  T_PROTFLT */
17327a828fcSPierre Beyssac 	1,			/* 10 T_TRCTRAP */
17427a828fcSPierre Beyssac 	LINUX_T_UNKNOWN,	/* 11 */
17527a828fcSPierre Beyssac 	14,			/* 12 T_PAGEFLT */
17627a828fcSPierre Beyssac 	LINUX_T_UNKNOWN,	/* 13 */
17727a828fcSPierre Beyssac 	17,			/* 14 T_ALIGNFLT */
17827a828fcSPierre Beyssac 	LINUX_T_UNKNOWN,	/* 15 */
17927a828fcSPierre Beyssac 	LINUX_T_UNKNOWN,	/* 16 */
18027a828fcSPierre Beyssac 	LINUX_T_UNKNOWN,	/* 17 */
18127a828fcSPierre Beyssac 	0,			/* 18 T_DIVIDE */
18227a828fcSPierre Beyssac 	2,			/* 19 T_NMI */
18327a828fcSPierre Beyssac 	4,			/* 20 T_OFLOW */
18427a828fcSPierre Beyssac 	5,			/* 21 T_BOUND */
18527a828fcSPierre Beyssac 	7,			/* 22 T_DNA */
18627a828fcSPierre Beyssac 	8,			/* 23 T_DOUBLEFLT */
18727a828fcSPierre Beyssac 	9,			/* 24 T_FPOPFLT */
18827a828fcSPierre Beyssac 	10,			/* 25 T_TSSFLT */
18927a828fcSPierre Beyssac 	11,			/* 26 T_SEGNPFLT */
19027a828fcSPierre Beyssac 	12,			/* 27 T_STKFLT */
19127a828fcSPierre Beyssac 	18,			/* 28 T_MCHK */
19227a828fcSPierre Beyssac 	19,			/* 29 T_XMMFLT */
19327a828fcSPierre Beyssac 	15			/* 30 T_RESERVED */
19427a828fcSPierre Beyssac };
19527a828fcSPierre Beyssac #define bsd_to_linux_trapcode(code) \
19627a828fcSPierre Beyssac     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
19727a828fcSPierre Beyssac      _bsd_to_linux_trapcode[(code)]: \
19827a828fcSPierre Beyssac      LINUX_T_UNKNOWN)
19927a828fcSPierre Beyssac 
200288078beSEivind Eklund /*
201288078beSEivind Eklund  * If FreeBSD & Linux have a difference of opinion about what a trap
202288078beSEivind Eklund  * means, deal with it here.
203356861dbSMatthew Dillon  *
204356861dbSMatthew Dillon  * MPSAFE
205288078beSEivind Eklund  */
206288078beSEivind Eklund static int
207288078beSEivind Eklund translate_traps(int signal, int trap_code)
208288078beSEivind Eklund {
209d563a53aSEivind Eklund 	if (signal != SIGBUS)
210d563a53aSEivind Eklund 		return signal;
211288078beSEivind Eklund 	switch (trap_code) {
212288078beSEivind Eklund 	case T_PROTFLT:
213288078beSEivind Eklund 	case T_TSSFLT:
214288078beSEivind Eklund 	case T_DOUBLEFLT:
215288078beSEivind Eklund 	case T_PAGEFLT:
216288078beSEivind Eklund 		return SIGSEGV;
217288078beSEivind Eklund 	default:
218288078beSEivind Eklund 		return signal;
219288078beSEivind Eklund 	}
220288078beSEivind Eklund }
221288078beSEivind Eklund 
222303b270bSEivind Eklund static int
223654f6be1SBruce Evans linux_fixup(register_t **stack_base, struct image_params *imgp)
224d66a5066SPeter Wemm {
225654f6be1SBruce Evans 	register_t *argv, *envp;
226d66a5066SPeter Wemm 
227d66a5066SPeter Wemm 	argv = *stack_base;
228610ecfe0SMaxim Sobolev 	envp = *stack_base + (imgp->args->argc + 1);
229d66a5066SPeter Wemm 	(*stack_base)--;
23086a14a7aSBruce Evans 	**stack_base = (intptr_t)(void *)envp;
231d66a5066SPeter Wemm 	(*stack_base)--;
23286a14a7aSBruce Evans 	**stack_base = (intptr_t)(void *)argv;
233d66a5066SPeter Wemm 	(*stack_base)--;
234610ecfe0SMaxim Sobolev 	**stack_base = imgp->args->argc;
2354d7c2e8aSDmitry Chagin 	return (0);
236d66a5066SPeter Wemm }
237d66a5066SPeter Wemm 
238303b270bSEivind Eklund static int
239654f6be1SBruce Evans elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
240e1743d02SSøren Schmidt {
2414d7c2e8aSDmitry Chagin 	struct proc *p;
24243cf129cSJohn Baldwin 	Elf32_Auxargs *args;
2434d7c2e8aSDmitry Chagin 	Elf32_Addr *uplatform;
2444d7c2e8aSDmitry Chagin 	struct ps_strings *arginfo;
245654f6be1SBruce Evans 	register_t *pos;
246d66a5066SPeter Wemm 
2476617724cSJeff Roberson 	KASSERT(curthread->td_proc == imgp->proc,
24843cf129cSJohn Baldwin 	    ("unsafe elf_linux_fixup(), should be curproc"));
2494d7c2e8aSDmitry Chagin 
2504d7c2e8aSDmitry Chagin 	p = imgp->proc;
2514d7c2e8aSDmitry Chagin 	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
2524d7c2e8aSDmitry Chagin 	uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szsigcode -
2534d7c2e8aSDmitry Chagin 	    linux_szplatform);
25443cf129cSJohn Baldwin 	args = (Elf32_Auxargs *)imgp->auxargs;
255610ecfe0SMaxim Sobolev 	pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
256e1743d02SSøren Schmidt 
2574d7c2e8aSDmitry Chagin 	AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature);
2588d30f381SDmitry Chagin 
2598d30f381SDmitry Chagin 	/*
2608d30f381SDmitry Chagin 	 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
2618d30f381SDmitry Chagin 	 * as it has appeared in the 2.4.0-rc7 first time.
2628d30f381SDmitry Chagin 	 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
2638d30f381SDmitry Chagin 	 * glibc falls back to the hard-coded CLK_TCK value when aux entry
2648d30f381SDmitry Chagin 	 * is not present.
2658d30f381SDmitry Chagin 	 * Also see linux_times() implementation.
2668d30f381SDmitry Chagin 	 */
2678d30f381SDmitry Chagin 	if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
2681ca16454SDmitry Chagin 		AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz);
269e1743d02SSøren Schmidt 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
270e1743d02SSøren Schmidt 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
271e1743d02SSøren Schmidt 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
272e1743d02SSøren Schmidt 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
273e1743d02SSøren Schmidt 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
274e1743d02SSøren Schmidt 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
275e1743d02SSøren Schmidt 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
2764d7c2e8aSDmitry Chagin 	AUXARGS_ENTRY(pos, LINUX_AT_SECURE, 0);
277b1fc0ec1SRobert Watson 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
278b1fc0ec1SRobert Watson 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
279b1fc0ec1SRobert Watson 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
280b1fc0ec1SRobert Watson 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
2814d7c2e8aSDmitry Chagin 	AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
2824d7c2e8aSDmitry Chagin 	if (args->execfd != -1)
2834d7c2e8aSDmitry Chagin 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
284e1743d02SSøren Schmidt 	AUXARGS_ENTRY(pos, AT_NULL, 0);
285e1743d02SSøren Schmidt 
286e1743d02SSøren Schmidt 	free(imgp->auxargs, M_TEMP);
287e1743d02SSøren Schmidt 	imgp->auxargs = NULL;
288e1743d02SSøren Schmidt 
289e1743d02SSøren Schmidt 	(*stack_base)--;
290610ecfe0SMaxim Sobolev 	**stack_base = (register_t)imgp->args->argc;
2914d7c2e8aSDmitry Chagin 	return (0);
292e1743d02SSøren Schmidt }
293d66a5066SPeter Wemm 
2944d7c2e8aSDmitry Chagin /*
2954d7c2e8aSDmitry Chagin  * Copied from kern/kern_exec.c
2964d7c2e8aSDmitry Chagin  */
2974d7c2e8aSDmitry Chagin static register_t *
2984d7c2e8aSDmitry Chagin linux_copyout_strings(struct image_params *imgp)
2994d7c2e8aSDmitry Chagin {
3004d7c2e8aSDmitry Chagin 	int argc, envc;
3014d7c2e8aSDmitry Chagin 	char **vectp;
3024d7c2e8aSDmitry Chagin 	char *stringp, *destp;
3034d7c2e8aSDmitry Chagin 	register_t *stack_base;
3044d7c2e8aSDmitry Chagin 	struct ps_strings *arginfo;
3054d7c2e8aSDmitry Chagin 	struct proc *p;
3064d7c2e8aSDmitry Chagin 
3074d7c2e8aSDmitry Chagin 	/*
3084d7c2e8aSDmitry Chagin 	 * Calculate string base and vector table pointers.
3094d7c2e8aSDmitry Chagin 	 * Also deal with signal trampoline code for this exec type.
3104d7c2e8aSDmitry Chagin 	 */
3114d7c2e8aSDmitry Chagin 	p = imgp->proc;
3124d7c2e8aSDmitry Chagin 	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
3134d7c2e8aSDmitry Chagin 	destp = (caddr_t)arginfo - linux_szsigcode - SPARE_USRSPACE -
3144d7c2e8aSDmitry Chagin 	    linux_szplatform - roundup((ARG_MAX - imgp->args->stringspace),
3154d7c2e8aSDmitry Chagin 	    sizeof(char *));
3164d7c2e8aSDmitry Chagin 
3174d7c2e8aSDmitry Chagin 	/*
3184d7c2e8aSDmitry Chagin 	 * install sigcode
3194d7c2e8aSDmitry Chagin 	 */
3204d7c2e8aSDmitry Chagin 	copyout(p->p_sysent->sv_sigcode, ((caddr_t)arginfo -
3214d7c2e8aSDmitry Chagin 	    linux_szsigcode), linux_szsigcode);
3224d7c2e8aSDmitry Chagin 
3234d7c2e8aSDmitry Chagin 	/*
3244d7c2e8aSDmitry Chagin 	 * install LINUX_PLATFORM
3254d7c2e8aSDmitry Chagin 	 */
3264d7c2e8aSDmitry Chagin 	copyout(linux_platform, ((caddr_t)arginfo - linux_szsigcode -
3274d7c2e8aSDmitry Chagin 	    linux_szplatform), linux_szplatform);
3284d7c2e8aSDmitry Chagin 
3294d7c2e8aSDmitry Chagin 	/*
3304d7c2e8aSDmitry Chagin 	 * If we have a valid auxargs ptr, prepare some room
3314d7c2e8aSDmitry Chagin 	 * on the stack.
3324d7c2e8aSDmitry Chagin 	 */
3334d7c2e8aSDmitry Chagin 	if (imgp->auxargs) {
3344d7c2e8aSDmitry Chagin 		/*
3354d7c2e8aSDmitry Chagin 		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
3364d7c2e8aSDmitry Chagin 		 * lower compatibility.
3374d7c2e8aSDmitry Chagin 		 */
3384d7c2e8aSDmitry Chagin 		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
3394d7c2e8aSDmitry Chagin 		    (LINUX_AT_COUNT * 2);
3404d7c2e8aSDmitry Chagin 		/*
3414d7c2e8aSDmitry Chagin 		 * The '+ 2' is for the null pointers at the end of each of
3424d7c2e8aSDmitry Chagin 		 * the arg and env vector sets,and imgp->auxarg_size is room
3434d7c2e8aSDmitry Chagin 		 * for argument of Runtime loader.
3444d7c2e8aSDmitry Chagin 		 */
3454d7c2e8aSDmitry Chagin 		vectp = (char **)(destp - (imgp->args->argc +
3464d7c2e8aSDmitry Chagin 		    imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *));
3474d7c2e8aSDmitry Chagin 	} else {
3484d7c2e8aSDmitry Chagin 		/*
3494d7c2e8aSDmitry Chagin 		 * The '+ 2' is for the null pointers at the end of each of
3504d7c2e8aSDmitry Chagin 		 * the arg and env vector sets
3514d7c2e8aSDmitry Chagin 		 */
3524d7c2e8aSDmitry Chagin 		vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) *
3534d7c2e8aSDmitry Chagin 		    sizeof(char *));
3544d7c2e8aSDmitry Chagin 	}
3554d7c2e8aSDmitry Chagin 
3564d7c2e8aSDmitry Chagin 	/*
3574d7c2e8aSDmitry Chagin 	 * vectp also becomes our initial stack base
3584d7c2e8aSDmitry Chagin 	 */
3594d7c2e8aSDmitry Chagin 	stack_base = (register_t *)vectp;
3604d7c2e8aSDmitry Chagin 
3614d7c2e8aSDmitry Chagin 	stringp = imgp->args->begin_argv;
3624d7c2e8aSDmitry Chagin 	argc = imgp->args->argc;
3634d7c2e8aSDmitry Chagin 	envc = imgp->args->envc;
3644d7c2e8aSDmitry Chagin 
3654d7c2e8aSDmitry Chagin 	/*
3664d7c2e8aSDmitry Chagin 	 * Copy out strings - arguments and environment.
3674d7c2e8aSDmitry Chagin 	 */
3684d7c2e8aSDmitry Chagin 	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
3694d7c2e8aSDmitry Chagin 
3704d7c2e8aSDmitry Chagin 	/*
3714d7c2e8aSDmitry Chagin 	 * Fill in "ps_strings" struct for ps, w, etc.
3724d7c2e8aSDmitry Chagin 	 */
3734d7c2e8aSDmitry Chagin 	suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
3744d7c2e8aSDmitry Chagin 	suword(&arginfo->ps_nargvstr, argc);
3754d7c2e8aSDmitry Chagin 
3764d7c2e8aSDmitry Chagin 	/*
3774d7c2e8aSDmitry Chagin 	 * Fill in argument portion of vector table.
3784d7c2e8aSDmitry Chagin 	 */
3794d7c2e8aSDmitry Chagin 	for (; argc > 0; --argc) {
3804d7c2e8aSDmitry Chagin 		suword(vectp++, (long)(intptr_t)destp);
3814d7c2e8aSDmitry Chagin 		while (*stringp++ != 0)
3824d7c2e8aSDmitry Chagin 			destp++;
3834d7c2e8aSDmitry Chagin 		destp++;
3844d7c2e8aSDmitry Chagin 	}
3854d7c2e8aSDmitry Chagin 
3864d7c2e8aSDmitry Chagin 	/* a null vector table pointer separates the argp's from the envp's */
3874d7c2e8aSDmitry Chagin 	suword(vectp++, 0);
3884d7c2e8aSDmitry Chagin 
3894d7c2e8aSDmitry Chagin 	suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
3904d7c2e8aSDmitry Chagin 	suword(&arginfo->ps_nenvstr, envc);
3914d7c2e8aSDmitry Chagin 
3924d7c2e8aSDmitry Chagin 	/*
3934d7c2e8aSDmitry Chagin 	 * Fill in environment portion of vector table.
3944d7c2e8aSDmitry Chagin 	 */
3954d7c2e8aSDmitry Chagin 	for (; envc > 0; --envc) {
3964d7c2e8aSDmitry Chagin 		suword(vectp++, (long)(intptr_t)destp);
3974d7c2e8aSDmitry Chagin 		while (*stringp++ != 0)
3984d7c2e8aSDmitry Chagin 			destp++;
3994d7c2e8aSDmitry Chagin 		destp++;
4004d7c2e8aSDmitry Chagin 	}
4014d7c2e8aSDmitry Chagin 
4024d7c2e8aSDmitry Chagin 	/* end of vector table is a null pointer */
4034d7c2e8aSDmitry Chagin 	suword(vectp, 0);
4044d7c2e8aSDmitry Chagin 
4054d7c2e8aSDmitry Chagin 	return (stack_base);
4064d7c2e8aSDmitry Chagin }
4074d7c2e8aSDmitry Chagin 
4084d7c2e8aSDmitry Chagin 
4094d7c2e8aSDmitry Chagin 
410d66a5066SPeter Wemm extern int _ucodesel, _udatasel;
41102318dacSJake Burkholder extern unsigned long linux_sznonrtsigcode;
41279363394SAndrew Gallatin 
41379363394SAndrew Gallatin static void
4149104847fSDavid Xu linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
41579363394SAndrew Gallatin {
4161d062e2bSDag-Erling Smørgrav 	struct thread *td = curthread;
4171d062e2bSDag-Erling Smørgrav 	struct proc *p = td->td_proc;
41890af4afaSJohn Baldwin 	struct sigacts *psp;
4191d062e2bSDag-Erling Smørgrav 	struct trapframe *regs;
4205002a60fSMarcel Moolenaar 	struct l_rt_sigframe *fp, frame;
4219104847fSDavid Xu 	int sig, code;
42279363394SAndrew Gallatin 	int oonstack;
42379363394SAndrew Gallatin 
4249104847fSDavid Xu 	sig = ksi->ksi_signo;
4259104847fSDavid Xu 	code = ksi->ksi_code;
426df53e91cSJohn Baldwin 	PROC_LOCK_ASSERT(p, MA_OWNED);
42790af4afaSJohn Baldwin 	psp = p->p_sigacts;
42890af4afaSJohn Baldwin 	mtx_assert(&psp->ps_mtx, MA_OWNED);
429b40ce416SJulian Elischer 	regs = td->td_frame;
430d034d459SMarcel Moolenaar 	oonstack = sigonstack(regs->tf_esp);
43179363394SAndrew Gallatin 
43279363394SAndrew Gallatin #ifdef DEBUG
4335002a60fSMarcel Moolenaar 	if (ldebug(rt_sendsig))
434728ef954SJohn Baldwin 		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
43524593369SJonathan Lemon 		    catcher, sig, (void*)mask, code);
43679363394SAndrew Gallatin #endif
43779363394SAndrew Gallatin 	/*
43879363394SAndrew Gallatin 	 * Allocate space for the signal handler context.
43979363394SAndrew Gallatin 	 */
440a30ec4b9SDavid Xu 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
44190af4afaSJohn Baldwin 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
442a30ec4b9SDavid Xu 		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
443a30ec4b9SDavid Xu 		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
444d034d459SMarcel Moolenaar 	} else
4455002a60fSMarcel Moolenaar 		fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
44690af4afaSJohn Baldwin 	mtx_unlock(&psp->ps_mtx);
44779363394SAndrew Gallatin 
44879363394SAndrew Gallatin 	/*
44979363394SAndrew Gallatin 	 * Build the argument list for the signal handler.
45079363394SAndrew Gallatin 	 */
45179363394SAndrew Gallatin 	if (p->p_sysent->sv_sigtbl)
45279363394SAndrew Gallatin 		if (sig <= p->p_sysent->sv_sigsize)
45379363394SAndrew Gallatin 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
45479363394SAndrew Gallatin 
45599d45c5fSMarcel Moolenaar 	bzero(&frame, sizeof(frame));
45699d45c5fSMarcel Moolenaar 
45779363394SAndrew Gallatin 	frame.sf_handler = catcher;
45879363394SAndrew Gallatin 	frame.sf_sig = sig;
45979363394SAndrew Gallatin 	frame.sf_siginfo = &fp->sf_si;
46079363394SAndrew Gallatin 	frame.sf_ucontext = &fp->sf_sc;
461cc6ca9b3SMarcel Moolenaar 
4629d05b77dSJuli Mallett 	/* Fill in POSIX parts */
463aa8b2011SKonstantin Belousov 	ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
464cc6ca9b3SMarcel Moolenaar 
46579363394SAndrew Gallatin 	/*
46679363394SAndrew Gallatin 	 * Build the signal context to be used by sigreturn.
46779363394SAndrew Gallatin 	 */
468cc6ca9b3SMarcel Moolenaar 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
469cc6ca9b3SMarcel Moolenaar 	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
470cc6ca9b3SMarcel Moolenaar 
471a30ec4b9SDavid Xu 	frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp;
472a30ec4b9SDavid Xu 	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
473a30ec4b9SDavid Xu 	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
474d034d459SMarcel Moolenaar 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
475611d9407SJohn Baldwin 	PROC_UNLOCK(p);
476cc6ca9b3SMarcel Moolenaar 
477cc6ca9b3SMarcel Moolenaar 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
478cc6ca9b3SMarcel Moolenaar 
479cc6ca9b3SMarcel Moolenaar 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
48079363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
48179363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
48279363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
48379363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
48479363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
48579363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
48679363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
48779363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
48879363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
48979363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
49079363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
49179363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
49279363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
49379363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
49479363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
49579363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
49679363394SAndrew Gallatin 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
49796a2b635SKonstantin Belousov 	frame.sf_sc.uc_mcontext.sc_cr2    = (register_t)ksi->ksi_addr;
49827a828fcSPierre Beyssac 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
49979363394SAndrew Gallatin 
50079363394SAndrew Gallatin #ifdef DEBUG
5015002a60fSMarcel Moolenaar 	if (ldebug(rt_sendsig))
50224593369SJonathan Lemon 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
5039b778a16SDavid Xu 		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
5049b778a16SDavid Xu 		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
50579363394SAndrew Gallatin #endif
50679363394SAndrew Gallatin 
50779363394SAndrew Gallatin 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
50879363394SAndrew Gallatin 		/*
50979363394SAndrew Gallatin 		 * Process has trashed its stack; give it an illegal
51079363394SAndrew Gallatin 		 * instruction to halt it in its tracks.
51179363394SAndrew Gallatin 		 */
51289734883SAlan Cox #ifdef DEBUG
51389734883SAlan Cox 		if (ldebug(rt_sendsig))
51489734883SAlan Cox 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
51589734883SAlan Cox 			    fp, oonstack);
51689734883SAlan Cox #endif
51719eb87d2SJohn Baldwin 		PROC_LOCK(p);
518b40ce416SJulian Elischer 		sigexit(td, SIGILL);
51979363394SAndrew Gallatin 	}
52079363394SAndrew Gallatin 
52179363394SAndrew Gallatin 	/*
52279363394SAndrew Gallatin 	 * Build context to run handler in.
52379363394SAndrew Gallatin 	 */
52479363394SAndrew Gallatin 	regs->tf_esp = (int)fp;
52579363394SAndrew Gallatin 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
52602318dacSJake Burkholder 	    linux_sznonrtsigcode;
52722eca0bfSKonstantin Belousov 	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
52879363394SAndrew Gallatin 	regs->tf_cs = _ucodesel;
52979363394SAndrew Gallatin 	regs->tf_ds = _udatasel;
53079363394SAndrew Gallatin 	regs->tf_es = _udatasel;
53179363394SAndrew Gallatin 	regs->tf_fs = _udatasel;
53279363394SAndrew Gallatin 	regs->tf_ss = _udatasel;
533df53e91cSJohn Baldwin 	PROC_LOCK(p);
53490af4afaSJohn Baldwin 	mtx_lock(&psp->ps_mtx);
53579363394SAndrew Gallatin }
53679363394SAndrew Gallatin 
537d66a5066SPeter Wemm 
538d66a5066SPeter Wemm /*
539d66a5066SPeter Wemm  * Send an interrupt to process.
540d66a5066SPeter Wemm  *
541d66a5066SPeter Wemm  * Stack is set up to allow sigcode stored
542d66a5066SPeter Wemm  * in u. to call routine, followed by kcall
543d66a5066SPeter Wemm  * to sigreturn routine below.  After sigreturn
544d66a5066SPeter Wemm  * resets the signal mask, the stack, and the
545d66a5066SPeter Wemm  * frame pointer, it returns to the user
546d66a5066SPeter Wemm  * specified pc, psl.
547d66a5066SPeter Wemm  */
548303b270bSEivind Eklund static void
5499104847fSDavid Xu linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
550d66a5066SPeter Wemm {
5511d062e2bSDag-Erling Smørgrav 	struct thread *td = curthread;
5521d062e2bSDag-Erling Smørgrav 	struct proc *p = td->td_proc;
55390af4afaSJohn Baldwin 	struct sigacts *psp;
5541d062e2bSDag-Erling Smørgrav 	struct trapframe *regs;
5555002a60fSMarcel Moolenaar 	struct l_sigframe *fp, frame;
5565002a60fSMarcel Moolenaar 	l_sigset_t lmask;
5579104847fSDavid Xu 	int sig, code;
5582c4ab9ddSAndrew Gallatin 	int oonstack, i;
559d66a5066SPeter Wemm 
5602509e6c2SJohn Baldwin 	PROC_LOCK_ASSERT(p, MA_OWNED);
56190af4afaSJohn Baldwin 	psp = p->p_sigacts;
5629104847fSDavid Xu 	sig = ksi->ksi_signo;
5639104847fSDavid Xu 	code = ksi->ksi_code;
56490af4afaSJohn Baldwin 	mtx_assert(&psp->ps_mtx, MA_OWNED);
56590af4afaSJohn Baldwin 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
566cc6ca9b3SMarcel Moolenaar 		/* Signal handler installed with SA_SIGINFO. */
5679104847fSDavid Xu 		linux_rt_sendsig(catcher, ksi, mask);
568cc6ca9b3SMarcel Moolenaar 		return;
569cc6ca9b3SMarcel Moolenaar 	}
570b40ce416SJulian Elischer 	regs = td->td_frame;
571d034d459SMarcel Moolenaar 	oonstack = sigonstack(regs->tf_esp);
572d66a5066SPeter Wemm 
573d66a5066SPeter Wemm #ifdef DEBUG
5745002a60fSMarcel Moolenaar 	if (ldebug(sendsig))
575728ef954SJohn Baldwin 		printf(ARGS(sendsig, "%p, %d, %p, %u"),
57624593369SJonathan Lemon 		    catcher, sig, (void*)mask, code);
577d66a5066SPeter Wemm #endif
57879363394SAndrew Gallatin 
579d66a5066SPeter Wemm 	/*
580d66a5066SPeter Wemm 	 * Allocate space for the signal handler context.
581d66a5066SPeter Wemm 	 */
582a30ec4b9SDavid Xu 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
58390af4afaSJohn Baldwin 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
584a30ec4b9SDavid Xu 		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
585a30ec4b9SDavid Xu 		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
586d034d459SMarcel Moolenaar 	} else
5875002a60fSMarcel Moolenaar 		fp = (struct l_sigframe *)regs->tf_esp - 1;
58890af4afaSJohn Baldwin 	mtx_unlock(&psp->ps_mtx);
589611d9407SJohn Baldwin 	PROC_UNLOCK(p);
590d66a5066SPeter Wemm 
591d66a5066SPeter Wemm 	/*
592d66a5066SPeter Wemm 	 * Build the argument list for the signal handler.
593d66a5066SPeter Wemm 	 */
594956d3333SMarcel Moolenaar 	if (p->p_sysent->sv_sigtbl)
595956d3333SMarcel Moolenaar 		if (sig <= p->p_sysent->sv_sigsize)
596956d3333SMarcel Moolenaar 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
597d66a5066SPeter Wemm 
59899d45c5fSMarcel Moolenaar 	bzero(&frame, sizeof(frame));
59999d45c5fSMarcel Moolenaar 
600d66a5066SPeter Wemm 	frame.sf_handler = catcher;
601d66a5066SPeter Wemm 	frame.sf_sig = sig;
602d66a5066SPeter Wemm 
603cc6ca9b3SMarcel Moolenaar 	bsd_to_linux_sigset(mask, &lmask);
604cc6ca9b3SMarcel Moolenaar 
605d66a5066SPeter Wemm 	/*
606d66a5066SPeter Wemm 	 * Build the signal context to be used by sigreturn.
607d66a5066SPeter Wemm 	 */
608cc6ca9b3SMarcel Moolenaar 	frame.sf_sc.sc_mask   = lmask.__bits[0];
6095206bca1SLuoqi Chen 	frame.sf_sc.sc_gs     = rgs();
6105206bca1SLuoqi Chen 	frame.sf_sc.sc_fs     = regs->tf_fs;
611213fdd80SPeter Wemm 	frame.sf_sc.sc_es     = regs->tf_es;
612213fdd80SPeter Wemm 	frame.sf_sc.sc_ds     = regs->tf_ds;
613213fdd80SPeter Wemm 	frame.sf_sc.sc_edi    = regs->tf_edi;
614213fdd80SPeter Wemm 	frame.sf_sc.sc_esi    = regs->tf_esi;
615213fdd80SPeter Wemm 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
616213fdd80SPeter Wemm 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
617213fdd80SPeter Wemm 	frame.sf_sc.sc_edx    = regs->tf_edx;
618213fdd80SPeter Wemm 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
619213fdd80SPeter Wemm 	frame.sf_sc.sc_eax    = regs->tf_eax;
620213fdd80SPeter Wemm 	frame.sf_sc.sc_eip    = regs->tf_eip;
621213fdd80SPeter Wemm 	frame.sf_sc.sc_cs     = regs->tf_cs;
622213fdd80SPeter Wemm 	frame.sf_sc.sc_eflags = regs->tf_eflags;
623213fdd80SPeter Wemm 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
624213fdd80SPeter Wemm 	frame.sf_sc.sc_ss     = regs->tf_ss;
625213fdd80SPeter Wemm 	frame.sf_sc.sc_err    = regs->tf_err;
62696a2b635SKonstantin Belousov 	frame.sf_sc.sc_cr2    = (register_t)ksi->ksi_addr;
6279104847fSDavid Xu 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno);
628cc6ca9b3SMarcel Moolenaar 
6292c4ab9ddSAndrew Gallatin 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
630cc6ca9b3SMarcel Moolenaar 		frame.sf_extramask[i] = lmask.__bits[i+1];
631d66a5066SPeter Wemm 
632d66a5066SPeter Wemm 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
633d66a5066SPeter Wemm 		/*
634d66a5066SPeter Wemm 		 * Process has trashed its stack; give it an illegal
635d66a5066SPeter Wemm 		 * instruction to halt it in its tracks.
636d66a5066SPeter Wemm 		 */
63719eb87d2SJohn Baldwin 		PROC_LOCK(p);
638b40ce416SJulian Elischer 		sigexit(td, SIGILL);
639d66a5066SPeter Wemm 	}
640d66a5066SPeter Wemm 
641d66a5066SPeter Wemm 	/*
642d66a5066SPeter Wemm 	 * Build context to run handler in.
643d66a5066SPeter Wemm 	 */
644213fdd80SPeter Wemm 	regs->tf_esp = (int)fp;
6454c56fcdeSBruce Evans 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
64622eca0bfSKonstantin Belousov 	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
647213fdd80SPeter Wemm 	regs->tf_cs = _ucodesel;
648213fdd80SPeter Wemm 	regs->tf_ds = _udatasel;
649213fdd80SPeter Wemm 	regs->tf_es = _udatasel;
6505206bca1SLuoqi Chen 	regs->tf_fs = _udatasel;
651213fdd80SPeter Wemm 	regs->tf_ss = _udatasel;
6525002a60fSMarcel Moolenaar 	PROC_LOCK(p);
65390af4afaSJohn Baldwin 	mtx_lock(&psp->ps_mtx);
654d66a5066SPeter Wemm }
655d66a5066SPeter Wemm 
656d66a5066SPeter Wemm /*
657d66a5066SPeter Wemm  * System call to cleanup state after a signal
658d66a5066SPeter Wemm  * has been taken.  Reset signal mask and
659d66a5066SPeter Wemm  * stack state from context left by sendsig (above).
660d66a5066SPeter Wemm  * Return to previous pc and psl as specified by
661d66a5066SPeter Wemm  * context left by sendsig. Check carefully to
662d66a5066SPeter Wemm  * make sure that the user has not modified the
663d66a5066SPeter Wemm  * psl to gain improper privileges or to cause
664d66a5066SPeter Wemm  * a machine fault.
665d66a5066SPeter Wemm  */
666d66a5066SPeter Wemm int
667b07cd97eSMark Murray linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
668d66a5066SPeter Wemm {
669b40ce416SJulian Elischer 	struct proc *p = td->td_proc;
6705002a60fSMarcel Moolenaar 	struct l_sigframe frame;
6711d062e2bSDag-Erling Smørgrav 	struct trapframe *regs;
6725002a60fSMarcel Moolenaar 	l_sigset_t lmask;
6732c4ab9ddSAndrew Gallatin 	int eflags, i;
6749104847fSDavid Xu 	ksiginfo_t ksi;
675d66a5066SPeter Wemm 
676b40ce416SJulian Elischer 	regs = td->td_frame;
677d66a5066SPeter Wemm 
678d66a5066SPeter Wemm #ifdef DEBUG
67924593369SJonathan Lemon 	if (ldebug(sigreturn))
68024593369SJonathan Lemon 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
681d66a5066SPeter Wemm #endif
682d66a5066SPeter Wemm 	/*
683cc6ca9b3SMarcel Moolenaar 	 * The trampoline code hands us the sigframe.
684d66a5066SPeter Wemm 	 * It is unsafe to keep track of it ourselves, in the event that a
685d66a5066SPeter Wemm 	 * program jumps out of a signal handler.
686d66a5066SPeter Wemm 	 */
6874b7ef73dSDag-Erling Smørgrav 	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
688d66a5066SPeter Wemm 		return (EFAULT);
689d66a5066SPeter Wemm 
690d66a5066SPeter Wemm 	/*
691d66a5066SPeter Wemm 	 * Check for security violations.
692d66a5066SPeter Wemm 	 */
693d66a5066SPeter Wemm #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
694cc6ca9b3SMarcel Moolenaar 	eflags = frame.sf_sc.sc_eflags;
695d66a5066SPeter Wemm 	/*
696d66a5066SPeter Wemm 	 * XXX do allow users to change the privileged flag PSL_RF.  The
697d66a5066SPeter Wemm 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
698d66a5066SPeter Wemm 	 * sometimes set it there too.  tf_eflags is kept in the signal
699d66a5066SPeter Wemm 	 * context during signal handling and there is no other place
700d66a5066SPeter Wemm 	 * to remember it, so the PSL_RF bit may be corrupted by the
701d66a5066SPeter Wemm 	 * signal handler without us knowing.  Corruption of the PSL_RF
702d66a5066SPeter Wemm 	 * bit at worst causes one more or one less debugger trap, so
703d66a5066SPeter Wemm 	 * allowing it is fairly harmless.
704d66a5066SPeter Wemm 	 */
705b07cd97eSMark Murray 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
706d66a5066SPeter Wemm 		return(EINVAL);
707d66a5066SPeter Wemm 
708d66a5066SPeter Wemm 	/*
709d66a5066SPeter Wemm 	 * Don't allow users to load a valid privileged %cs.  Let the
710d66a5066SPeter Wemm 	 * hardware check for invalid selectors, excess privilege in
711d66a5066SPeter Wemm 	 * other selectors, invalid %eip's and invalid %esp's.
712d66a5066SPeter Wemm 	 */
71340d50994SPhilippe Charnier #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
714cc6ca9b3SMarcel Moolenaar 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
7159104847fSDavid Xu 		ksiginfo_init_trap(&ksi);
7169104847fSDavid Xu 		ksi.ksi_signo = SIGBUS;
7179104847fSDavid Xu 		ksi.ksi_code = BUS_OBJERR;
7189104847fSDavid Xu 		ksi.ksi_trapno = T_PROTFLT;
7199104847fSDavid Xu 		ksi.ksi_addr = (void *)regs->tf_eip;
7209104847fSDavid Xu 		trapsignal(td, &ksi);
721d66a5066SPeter Wemm 		return(EINVAL);
722d66a5066SPeter Wemm 	}
723d66a5066SPeter Wemm 
724cc6ca9b3SMarcel Moolenaar 	lmask.__bits[0] = frame.sf_sc.sc_mask;
7252c4ab9ddSAndrew Gallatin 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
726cc6ca9b3SMarcel Moolenaar 		lmask.__bits[i+1] = frame.sf_extramask[i];
727611d9407SJohn Baldwin 	PROC_LOCK(p);
7284093529dSJeff Roberson 	linux_to_bsd_sigset(&lmask, &td->td_sigmask);
7294093529dSJeff Roberson 	SIG_CANTMASK(td->td_sigmask);
7304093529dSJeff Roberson 	signotify(td);
731611d9407SJohn Baldwin 	PROC_UNLOCK(p);
732956d3333SMarcel Moolenaar 
733d66a5066SPeter Wemm 	/*
734d66a5066SPeter Wemm 	 * Restore signal context.
735d66a5066SPeter Wemm 	 */
7365206bca1SLuoqi Chen 	/* %gs was restored by the trampoline. */
737cc6ca9b3SMarcel Moolenaar 	regs->tf_fs     = frame.sf_sc.sc_fs;
738cc6ca9b3SMarcel Moolenaar 	regs->tf_es     = frame.sf_sc.sc_es;
739cc6ca9b3SMarcel Moolenaar 	regs->tf_ds     = frame.sf_sc.sc_ds;
740cc6ca9b3SMarcel Moolenaar 	regs->tf_edi    = frame.sf_sc.sc_edi;
741cc6ca9b3SMarcel Moolenaar 	regs->tf_esi    = frame.sf_sc.sc_esi;
742cc6ca9b3SMarcel Moolenaar 	regs->tf_ebp    = frame.sf_sc.sc_ebp;
743cc6ca9b3SMarcel Moolenaar 	regs->tf_ebx    = frame.sf_sc.sc_ebx;
744cc6ca9b3SMarcel Moolenaar 	regs->tf_edx    = frame.sf_sc.sc_edx;
745cc6ca9b3SMarcel Moolenaar 	regs->tf_ecx    = frame.sf_sc.sc_ecx;
746cc6ca9b3SMarcel Moolenaar 	regs->tf_eax    = frame.sf_sc.sc_eax;
747cc6ca9b3SMarcel Moolenaar 	regs->tf_eip    = frame.sf_sc.sc_eip;
748cc6ca9b3SMarcel Moolenaar 	regs->tf_cs     = frame.sf_sc.sc_cs;
749213fdd80SPeter Wemm 	regs->tf_eflags = eflags;
750cc6ca9b3SMarcel Moolenaar 	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
751cc6ca9b3SMarcel Moolenaar 	regs->tf_ss     = frame.sf_sc.sc_ss;
752d66a5066SPeter Wemm 
753d66a5066SPeter Wemm 	return (EJUSTRETURN);
754d66a5066SPeter Wemm }
755d66a5066SPeter Wemm 
75679363394SAndrew Gallatin /*
75779363394SAndrew Gallatin  * System call to cleanup state after a signal
75879363394SAndrew Gallatin  * has been taken.  Reset signal mask and
75979363394SAndrew Gallatin  * stack state from context left by rt_sendsig (above).
76079363394SAndrew Gallatin  * Return to previous pc and psl as specified by
76179363394SAndrew Gallatin  * context left by sendsig. Check carefully to
76279363394SAndrew Gallatin  * make sure that the user has not modified the
76379363394SAndrew Gallatin  * psl to gain improper privileges or to cause
76479363394SAndrew Gallatin  * a machine fault.
76579363394SAndrew Gallatin  */
76679363394SAndrew Gallatin int
767b07cd97eSMark Murray linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
76879363394SAndrew Gallatin {
769b40ce416SJulian Elischer 	struct proc *p = td->td_proc;
7705002a60fSMarcel Moolenaar 	struct l_ucontext uc;
7715002a60fSMarcel Moolenaar 	struct l_sigcontext *context;
7725002a60fSMarcel Moolenaar 	l_stack_t *lss;
773206a5d3aSIan Dowse 	stack_t ss;
7741d062e2bSDag-Erling Smørgrav 	struct trapframe *regs;
77579363394SAndrew Gallatin 	int eflags;
7769104847fSDavid Xu 	ksiginfo_t ksi;
77779363394SAndrew Gallatin 
778b40ce416SJulian Elischer 	regs = td->td_frame;
77979363394SAndrew Gallatin 
78079363394SAndrew Gallatin #ifdef DEBUG
78124593369SJonathan Lemon 	if (ldebug(rt_sigreturn))
78224593369SJonathan Lemon 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
78379363394SAndrew Gallatin #endif
78479363394SAndrew Gallatin 	/*
785cc6ca9b3SMarcel Moolenaar 	 * The trampoline code hands us the ucontext.
78679363394SAndrew Gallatin 	 * It is unsafe to keep track of it ourselves, in the event that a
78779363394SAndrew Gallatin 	 * program jumps out of a signal handler.
78879363394SAndrew Gallatin 	 */
7894b7ef73dSDag-Erling Smørgrav 	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
79079363394SAndrew Gallatin 		return (EFAULT);
79179363394SAndrew Gallatin 
79279363394SAndrew Gallatin 	context = &uc.uc_mcontext;
79379363394SAndrew Gallatin 
79479363394SAndrew Gallatin 	/*
79579363394SAndrew Gallatin 	 * Check for security violations.
79679363394SAndrew Gallatin 	 */
79779363394SAndrew Gallatin #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
79879363394SAndrew Gallatin 	eflags = context->sc_eflags;
79979363394SAndrew Gallatin 	/*
80079363394SAndrew Gallatin 	 * XXX do allow users to change the privileged flag PSL_RF.  The
80179363394SAndrew Gallatin 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
80279363394SAndrew Gallatin 	 * sometimes set it there too.  tf_eflags is kept in the signal
80379363394SAndrew Gallatin 	 * context during signal handling and there is no other place
80479363394SAndrew Gallatin 	 * to remember it, so the PSL_RF bit may be corrupted by the
80579363394SAndrew Gallatin 	 * signal handler without us knowing.  Corruption of the PSL_RF
80679363394SAndrew Gallatin 	 * bit at worst causes one more or one less debugger trap, so
80779363394SAndrew Gallatin 	 * allowing it is fairly harmless.
80879363394SAndrew Gallatin 	 */
809b07cd97eSMark Murray 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
81079363394SAndrew Gallatin 		return(EINVAL);
81179363394SAndrew Gallatin 
81279363394SAndrew Gallatin 	/*
81379363394SAndrew Gallatin 	 * Don't allow users to load a valid privileged %cs.  Let the
81479363394SAndrew Gallatin 	 * hardware check for invalid selectors, excess privilege in
81579363394SAndrew Gallatin 	 * other selectors, invalid %eip's and invalid %esp's.
81679363394SAndrew Gallatin 	 */
81779363394SAndrew Gallatin #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
81879363394SAndrew Gallatin 	if (!CS_SECURE(context->sc_cs)) {
8199104847fSDavid Xu 		ksiginfo_init_trap(&ksi);
8209104847fSDavid Xu 		ksi.ksi_signo = SIGBUS;
8219104847fSDavid Xu 		ksi.ksi_code = BUS_OBJERR;
8229104847fSDavid Xu 		ksi.ksi_trapno = T_PROTFLT;
8239104847fSDavid Xu 		ksi.ksi_addr = (void *)regs->tf_eip;
8249104847fSDavid Xu 		trapsignal(td, &ksi);
82579363394SAndrew Gallatin 		return(EINVAL);
82679363394SAndrew Gallatin 	}
82779363394SAndrew Gallatin 
828611d9407SJohn Baldwin 	PROC_LOCK(p);
8294093529dSJeff Roberson 	linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
8304093529dSJeff Roberson 	SIG_CANTMASK(td->td_sigmask);
8314093529dSJeff Roberson 	signotify(td);
832611d9407SJohn Baldwin 	PROC_UNLOCK(p);
83379363394SAndrew Gallatin 
83479363394SAndrew Gallatin 	/*
835cc6ca9b3SMarcel Moolenaar 	 * Restore signal context
83679363394SAndrew Gallatin 	 */
83779363394SAndrew Gallatin 	/* %gs was restored by the trampoline. */
83879363394SAndrew Gallatin 	regs->tf_fs     = context->sc_fs;
83979363394SAndrew Gallatin 	regs->tf_es     = context->sc_es;
84079363394SAndrew Gallatin 	regs->tf_ds     = context->sc_ds;
84179363394SAndrew Gallatin 	regs->tf_edi    = context->sc_edi;
84279363394SAndrew Gallatin 	regs->tf_esi    = context->sc_esi;
84379363394SAndrew Gallatin 	regs->tf_ebp    = context->sc_ebp;
84479363394SAndrew Gallatin 	regs->tf_ebx    = context->sc_ebx;
84579363394SAndrew Gallatin 	regs->tf_edx    = context->sc_edx;
84679363394SAndrew Gallatin 	regs->tf_ecx    = context->sc_ecx;
84779363394SAndrew Gallatin 	regs->tf_eax    = context->sc_eax;
84879363394SAndrew Gallatin 	regs->tf_eip    = context->sc_eip;
84979363394SAndrew Gallatin 	regs->tf_cs     = context->sc_cs;
85079363394SAndrew Gallatin 	regs->tf_eflags = eflags;
85179363394SAndrew Gallatin 	regs->tf_esp    = context->sc_esp_at_signal;
85279363394SAndrew Gallatin 	regs->tf_ss     = context->sc_ss;
85379363394SAndrew Gallatin 
85479363394SAndrew Gallatin 	/*
85579363394SAndrew Gallatin 	 * call sigaltstack & ignore results..
85679363394SAndrew Gallatin 	 */
85779363394SAndrew Gallatin 	lss = &uc.uc_stack;
858206a5d3aSIan Dowse 	ss.ss_sp = lss->ss_sp;
859206a5d3aSIan Dowse 	ss.ss_size = lss->ss_size;
860206a5d3aSIan Dowse 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
86179363394SAndrew Gallatin 
86279363394SAndrew Gallatin #ifdef DEBUG
86324593369SJonathan Lemon 	if (ldebug(rt_sigreturn))
86424593369SJonathan Lemon 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
865206a5d3aSIan Dowse 		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
86679363394SAndrew Gallatin #endif
867206a5d3aSIan Dowse 	(void)kern_sigaltstack(td, &ss, NULL);
86879363394SAndrew Gallatin 
86979363394SAndrew Gallatin 	return (EJUSTRETURN);
87079363394SAndrew Gallatin }
87179363394SAndrew Gallatin 
872356861dbSMatthew Dillon /*
873356861dbSMatthew Dillon  * MPSAFE
874356861dbSMatthew Dillon  */
875303b270bSEivind Eklund static void
876d66a5066SPeter Wemm linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
877d66a5066SPeter Wemm {
878d66a5066SPeter Wemm 	args[0] = tf->tf_ebx;
879d66a5066SPeter Wemm 	args[1] = tf->tf_ecx;
880d66a5066SPeter Wemm 	args[2] = tf->tf_edx;
881d66a5066SPeter Wemm 	args[3] = tf->tf_esi;
882d66a5066SPeter Wemm 	args[4] = tf->tf_edi;
8837646aefcSPeter Wemm 	args[5] = tf->tf_ebp;	/* Unconfirmed */
884d66a5066SPeter Wemm 	*params = NULL;		/* no copyin */
885d66a5066SPeter Wemm }
886d66a5066SPeter Wemm 
887d323ddf3SMatthew Dillon /*
888d323ddf3SMatthew Dillon  * If a linux binary is exec'ing something, try this image activator
889d323ddf3SMatthew Dillon  * first.  We override standard shell script execution in order to
890d323ddf3SMatthew Dillon  * be able to modify the interpreter path.  We only do this if a linux
891d323ddf3SMatthew Dillon  * binary is doing the exec, so we do not create an EXEC module for it.
892d323ddf3SMatthew Dillon  */
89389c9a483SAlfred Perlstein static int	exec_linux_imgact_try(struct image_params *iparams);
894d323ddf3SMatthew Dillon 
895d323ddf3SMatthew Dillon static int
896b07cd97eSMark Murray exec_linux_imgact_try(struct image_params *imgp)
897d323ddf3SMatthew Dillon {
898d323ddf3SMatthew Dillon     const char *head = (const char *)imgp->image_header;
8990311233eSJohn Baldwin     char *rpath;
9000311233eSJohn Baldwin     int error = -1, len;
901d323ddf3SMatthew Dillon 
902d323ddf3SMatthew Dillon     /*
903d323ddf3SMatthew Dillon      * The interpreter for shell scripts run from a linux binary needs
904d323ddf3SMatthew Dillon      * to be located in /compat/linux if possible in order to recursively
905d323ddf3SMatthew Dillon      * maintain linux path emulation.
906d323ddf3SMatthew Dillon      */
907d323ddf3SMatthew Dillon     if (((const short *)head)[0] == SHELLMAGIC) {
908d323ddf3SMatthew Dillon 	    /*
909d323ddf3SMatthew Dillon 	     * Run our normal shell image activator.  If it succeeds attempt
910d323ddf3SMatthew Dillon 	     * to use the alternate path for the interpreter.  If an alternate
911d323ddf3SMatthew Dillon 	     * path is found, use our stringspace to store it.
912d323ddf3SMatthew Dillon 	     */
913d323ddf3SMatthew Dillon 	    if ((error = exec_shell_imgact(imgp)) == 0) {
9140311233eSJohn Baldwin 		    linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
91548b05c3fSKonstantin Belousov 			imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD);
9160311233eSJohn Baldwin 		    if (rpath != NULL) {
9170311233eSJohn Baldwin 			    len = strlen(rpath) + 1;
918d323ddf3SMatthew Dillon 
919d323ddf3SMatthew Dillon 			    if (len <= MAXSHELLCMDLEN) {
920d323ddf3SMatthew Dillon 				    memcpy(imgp->interpreter_name, rpath, len);
921d323ddf3SMatthew Dillon 			    }
922d323ddf3SMatthew Dillon 			    free(rpath, M_TEMP);
923d323ddf3SMatthew Dillon 		    }
924d323ddf3SMatthew Dillon 	    }
925d323ddf3SMatthew Dillon     }
926d323ddf3SMatthew Dillon     return(error);
927d323ddf3SMatthew Dillon }
928d323ddf3SMatthew Dillon 
929598d45beSMatthew N. Dodd /*
930598d45beSMatthew N. Dodd  * exec_setregs may initialize some registers differently than Linux
931598d45beSMatthew N. Dodd  * does, thus potentially confusing Linux binaries. If necessary, we
932598d45beSMatthew N. Dodd  * override the exec_setregs default(s) here.
933598d45beSMatthew N. Dodd  */
934598d45beSMatthew N. Dodd static void
935598d45beSMatthew N. Dodd exec_linux_setregs(struct thread *td, u_long entry,
936598d45beSMatthew N. Dodd 		   u_long stack, u_long ps_strings)
937598d45beSMatthew N. Dodd {
938598d45beSMatthew N. Dodd 	struct pcb *pcb = td->td_pcb;
939598d45beSMatthew N. Dodd 
940598d45beSMatthew N. Dodd 	exec_setregs(td, entry, stack, ps_strings);
941598d45beSMatthew N. Dodd 
942598d45beSMatthew N. Dodd 	/* Linux sets %gs to 0, we default to _udatasel */
9432ee8325fSJohn Baldwin 	pcb->pcb_gs = 0;
9442ee8325fSJohn Baldwin 	load_gs(0);
9452a51b9b0SDavid Schultz 
9462ee8325fSJohn Baldwin 	pcb->pcb_initial_npxcw = __LINUX_NPXCW__;
947598d45beSMatthew N. Dodd }
948598d45beSMatthew N. Dodd 
9494d7c2e8aSDmitry Chagin static void
9504d7c2e8aSDmitry Chagin linux_get_machine(const char **dst)
9514d7c2e8aSDmitry Chagin {
9524d7c2e8aSDmitry Chagin 
9534d7c2e8aSDmitry Chagin 	switch (cpu_class) {
9544d7c2e8aSDmitry Chagin 	case CPUCLASS_686:
9554d7c2e8aSDmitry Chagin 		*dst = "i686";
9564d7c2e8aSDmitry Chagin 		break;
9574d7c2e8aSDmitry Chagin 	case CPUCLASS_586:
9584d7c2e8aSDmitry Chagin 		*dst = "i586";
9594d7c2e8aSDmitry Chagin 		break;
9604d7c2e8aSDmitry Chagin 	case CPUCLASS_486:
9614d7c2e8aSDmitry Chagin 		*dst = "i486";
9624d7c2e8aSDmitry Chagin 		break;
9634d7c2e8aSDmitry Chagin 	default:
9644d7c2e8aSDmitry Chagin 		*dst = "i386";
9654d7c2e8aSDmitry Chagin 	}
9664d7c2e8aSDmitry Chagin }
9674d7c2e8aSDmitry Chagin 
968d66a5066SPeter Wemm struct sysentvec linux_sysvec = {
969a8d403e1SKonstantin Belousov 	.sv_size	= LINUX_SYS_MAXSYSCALL,
970a8d403e1SKonstantin Belousov 	.sv_table	= linux_sysent,
971a8d403e1SKonstantin Belousov 	.sv_mask	= 0,
972a8d403e1SKonstantin Belousov 	.sv_sigsize	= LINUX_SIGTBLSZ,
973a8d403e1SKonstantin Belousov 	.sv_sigtbl	= bsd_to_linux_signal,
974a8d403e1SKonstantin Belousov 	.sv_errsize	= ELAST + 1,
975a8d403e1SKonstantin Belousov 	.sv_errtbl	= bsd_to_linux_errno,
976a8d403e1SKonstantin Belousov 	.sv_transtrap	= translate_traps,
977a8d403e1SKonstantin Belousov 	.sv_fixup	= linux_fixup,
978a8d403e1SKonstantin Belousov 	.sv_sendsig	= linux_sendsig,
979a8d403e1SKonstantin Belousov 	.sv_sigcode	= linux_sigcode,
980a8d403e1SKonstantin Belousov 	.sv_szsigcode	= &linux_szsigcode,
981a8d403e1SKonstantin Belousov 	.sv_prepsyscall	= linux_prepsyscall,
982a8d403e1SKonstantin Belousov 	.sv_name	= "Linux a.out",
983a8d403e1SKonstantin Belousov 	.sv_coredump	= NULL,
984a8d403e1SKonstantin Belousov 	.sv_imgact_try	= exec_linux_imgact_try,
985a8d403e1SKonstantin Belousov 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
986a8d403e1SKonstantin Belousov 	.sv_pagesize	= PAGE_SIZE,
987a8d403e1SKonstantin Belousov 	.sv_minuser	= VM_MIN_ADDRESS,
988a8d403e1SKonstantin Belousov 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
989a8d403e1SKonstantin Belousov 	.sv_usrstack	= USRSTACK,
990a8d403e1SKonstantin Belousov 	.sv_psstrings	= PS_STRINGS,
991a8d403e1SKonstantin Belousov 	.sv_stackprot	= VM_PROT_ALL,
992a8d403e1SKonstantin Belousov 	.sv_copyout_strings = exec_copyout_strings,
993a8d403e1SKonstantin Belousov 	.sv_setregs	= exec_linux_setregs,
994a8d403e1SKonstantin Belousov 	.sv_fixlimit	= NULL,
995b4cf0e62SKonstantin Belousov 	.sv_maxssiz	= NULL,
996b4cf0e62SKonstantin Belousov 	.sv_flags	= SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32
997d66a5066SPeter Wemm };
998e1743d02SSøren Schmidt 
999e1743d02SSøren Schmidt struct sysentvec elf_linux_sysvec = {
1000a8d403e1SKonstantin Belousov 	.sv_size	= LINUX_SYS_MAXSYSCALL,
1001a8d403e1SKonstantin Belousov 	.sv_table	= linux_sysent,
1002a8d403e1SKonstantin Belousov 	.sv_mask	= 0,
1003a8d403e1SKonstantin Belousov 	.sv_sigsize	= LINUX_SIGTBLSZ,
1004a8d403e1SKonstantin Belousov 	.sv_sigtbl	= bsd_to_linux_signal,
1005a8d403e1SKonstantin Belousov 	.sv_errsize	= ELAST + 1,
1006a8d403e1SKonstantin Belousov 	.sv_errtbl	= bsd_to_linux_errno,
1007a8d403e1SKonstantin Belousov 	.sv_transtrap	= translate_traps,
1008a8d403e1SKonstantin Belousov 	.sv_fixup	= elf_linux_fixup,
1009a8d403e1SKonstantin Belousov 	.sv_sendsig	= linux_sendsig,
1010a8d403e1SKonstantin Belousov 	.sv_sigcode	= linux_sigcode,
1011a8d403e1SKonstantin Belousov 	.sv_szsigcode	= &linux_szsigcode,
1012a8d403e1SKonstantin Belousov 	.sv_prepsyscall	= linux_prepsyscall,
1013a8d403e1SKonstantin Belousov 	.sv_name	= "Linux ELF",
1014a8d403e1SKonstantin Belousov 	.sv_coredump	= elf32_coredump,
1015a8d403e1SKonstantin Belousov 	.sv_imgact_try	= exec_linux_imgact_try,
1016a8d403e1SKonstantin Belousov 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
1017a8d403e1SKonstantin Belousov 	.sv_pagesize	= PAGE_SIZE,
1018a8d403e1SKonstantin Belousov 	.sv_minuser	= VM_MIN_ADDRESS,
1019a8d403e1SKonstantin Belousov 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
1020a8d403e1SKonstantin Belousov 	.sv_usrstack	= USRSTACK,
1021a8d403e1SKonstantin Belousov 	.sv_psstrings	= PS_STRINGS,
1022a8d403e1SKonstantin Belousov 	.sv_stackprot	= VM_PROT_ALL,
10234d7c2e8aSDmitry Chagin 	.sv_copyout_strings = linux_copyout_strings,
1024a8d403e1SKonstantin Belousov 	.sv_setregs	= exec_linux_setregs,
1025a8d403e1SKonstantin Belousov 	.sv_fixlimit	= NULL,
1026b4cf0e62SKonstantin Belousov 	.sv_maxssiz	= NULL,
1027b4cf0e62SKonstantin Belousov 	.sv_flags	= SV_ABI_LINUX | SV_IA32 | SV_ILP32
1028e1743d02SSøren Schmidt };
1029e1743d02SSøren Schmidt 
103032c01de2SDmitry Chagin static char GNULINUX_ABI_VENDOR[] = "GNU";
103132c01de2SDmitry Chagin 
103232c01de2SDmitry Chagin static Elf_Brandnote linux_brandnote = {
103332c01de2SDmitry Chagin 	.hdr.n_namesz	= sizeof(GNULINUX_ABI_VENDOR),
103432c01de2SDmitry Chagin 	.hdr.n_descsz	= 16,
103532c01de2SDmitry Chagin 	.hdr.n_type	= 1,
103632c01de2SDmitry Chagin 	.vendor		= GNULINUX_ABI_VENDOR,
103732c01de2SDmitry Chagin 	.flags		= 0
103832c01de2SDmitry Chagin };
103932c01de2SDmitry Chagin 
1040514058dcSAlexander Langer static Elf32_Brandinfo linux_brand = {
1041a8d403e1SKonstantin Belousov 	.brand		= ELFOSABI_LINUX,
1042a8d403e1SKonstantin Belousov 	.machine	= EM_386,
1043a8d403e1SKonstantin Belousov 	.compat_3_brand	= "Linux",
1044a8d403e1SKonstantin Belousov 	.emul_path	= "/compat/linux",
1045a8d403e1SKonstantin Belousov 	.interp_path	= "/lib/ld-linux.so.1",
1046a8d403e1SKonstantin Belousov 	.sysvec		= &elf_linux_sysvec,
1047a8d403e1SKonstantin Belousov 	.interp_newpath	= NULL,
104832c01de2SDmitry Chagin 	.brand_note	= &linux_brandnote,
1049cd899aadSDmitry Chagin 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
10505cf588ebSPeter Wemm };
10515cf588ebSPeter Wemm 
1052514058dcSAlexander Langer static Elf32_Brandinfo linux_glibc2brand = {
1053a8d403e1SKonstantin Belousov 	.brand		= ELFOSABI_LINUX,
1054a8d403e1SKonstantin Belousov 	.machine	= EM_386,
1055a8d403e1SKonstantin Belousov 	.compat_3_brand	= "Linux",
1056a8d403e1SKonstantin Belousov 	.emul_path	= "/compat/linux",
1057a8d403e1SKonstantin Belousov 	.interp_path	= "/lib/ld-linux.so.2",
1058a8d403e1SKonstantin Belousov 	.sysvec		= &elf_linux_sysvec,
1059a8d403e1SKonstantin Belousov 	.interp_newpath	= NULL,
106032c01de2SDmitry Chagin 	.brand_note	= &linux_brandnote,
1061cd899aadSDmitry Chagin 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
10624e138a28SMike Smith };
10634e138a28SMike Smith 
1064514058dcSAlexander Langer Elf32_Brandinfo *linux_brandlist[] = {
1065514058dcSAlexander Langer 	&linux_brand,
1066514058dcSAlexander Langer 	&linux_glibc2brand,
1067514058dcSAlexander Langer 	NULL
1068514058dcSAlexander Langer };
1069514058dcSAlexander Langer 
1070aa855a59SPeter Wemm static int
1071c25ded31SBruce Evans linux_elf_modevent(module_t mod, int type, void *data)
1072d30ea4f5SPeter Wemm {
1073514058dcSAlexander Langer 	Elf32_Brandinfo **brandinfo;
1074514058dcSAlexander Langer 	int error;
1075f41325dbSPeter Wemm 	struct linux_ioctl_handler **lihp;
1076060e4882SDoug Ambrisko 	struct linux_device_handler **ldhp;
1077514058dcSAlexander Langer 
1078514058dcSAlexander Langer 	error = 0;
1079514058dcSAlexander Langer 
1080aa855a59SPeter Wemm 	switch(type) {
1081aa855a59SPeter Wemm 	case MOD_LOAD:
1082aa855a59SPeter Wemm 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1083aa855a59SPeter Wemm 		     ++brandinfo)
10843ebc1248SPeter Wemm 			if (elf32_insert_brand_entry(*brandinfo) < 0)
1085aa855a59SPeter Wemm 				error = EINVAL;
1086466b14d7SMarcel Moolenaar 		if (error == 0) {
1087f41325dbSPeter Wemm 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1088f41325dbSPeter Wemm 				linux_ioctl_register_handler(*lihp);
1089060e4882SDoug Ambrisko 			SET_FOREACH(ldhp, linux_device_handler_set)
1090060e4882SDoug Ambrisko 				linux_device_register_handler(*ldhp);
1091357afa71SJung-uk Kim 			mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
10929b44bfc5SAlexander Leidinger 			sx_init(&emul_shared_lock, "emuldata->shared lock");
10939b44bfc5SAlexander Leidinger 			LIST_INIT(&futex_list);
109479262bf1SDmitry Chagin 			mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
10959b44bfc5SAlexander Leidinger 			linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit,
10969b44bfc5SAlexander Leidinger 			      NULL, 1000);
10979b44bfc5SAlexander Leidinger 			linux_schedtail_tag = EVENTHANDLER_REGISTER(schedtail, linux_schedtail,
10989b44bfc5SAlexander Leidinger 			      NULL, 1000);
10999b44bfc5SAlexander Leidinger 			linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec,
11009b44bfc5SAlexander Leidinger 			      NULL, 1000);
11014d7c2e8aSDmitry Chagin 			linux_get_machine(&linux_platform);
11024d7c2e8aSDmitry Chagin 			linux_szplatform = roundup(strlen(linux_platform) + 1,
11034d7c2e8aSDmitry Chagin 			    sizeof(char *));
11047ae27ff4SJamie Gritton 			linux_osd_jail_register();
11051ca16454SDmitry Chagin 			stclohz = (stathz ? stathz : hz);
110643bef515SMarcel Moolenaar 			if (bootverbose)
1107466b14d7SMarcel Moolenaar 				printf("Linux ELF exec handler installed\n");
1108466b14d7SMarcel Moolenaar 		} else
1109466b14d7SMarcel Moolenaar 			printf("cannot insert Linux ELF brand handler\n");
1110aa855a59SPeter Wemm 		break;
1111aa855a59SPeter Wemm 	case MOD_UNLOAD:
1112aa855a59SPeter Wemm 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1113aa855a59SPeter Wemm 		     ++brandinfo)
11143ebc1248SPeter Wemm 			if (elf32_brand_inuse(*brandinfo))
1115d2758342SMark Newton 				error = EBUSY;
1116d2758342SMark Newton 		if (error == 0) {
1117d2758342SMark Newton 			for (brandinfo = &linux_brandlist[0];
1118d2758342SMark Newton 			     *brandinfo != NULL; ++brandinfo)
11193ebc1248SPeter Wemm 				if (elf32_remove_brand_entry(*brandinfo) < 0)
1120aa855a59SPeter Wemm 					error = EINVAL;
1121d2758342SMark Newton 		}
1122466b14d7SMarcel Moolenaar 		if (error == 0) {
1123f41325dbSPeter Wemm 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1124f41325dbSPeter Wemm 				linux_ioctl_unregister_handler(*lihp);
1125060e4882SDoug Ambrisko 			SET_FOREACH(ldhp, linux_device_handler_set)
1126060e4882SDoug Ambrisko 				linux_device_unregister_handler(*ldhp);
1127357afa71SJung-uk Kim 			mtx_destroy(&emul_lock);
11289b44bfc5SAlexander Leidinger 			sx_destroy(&emul_shared_lock);
112979262bf1SDmitry Chagin 			mtx_destroy(&futex_mtx);
11309b44bfc5SAlexander Leidinger 			EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
11319b44bfc5SAlexander Leidinger 			EVENTHANDLER_DEREGISTER(schedtail, linux_schedtail_tag);
11329b44bfc5SAlexander Leidinger 			EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
11337ae27ff4SJamie Gritton 			linux_osd_jail_deregister();
1134466b14d7SMarcel Moolenaar 			if (bootverbose)
1135466b14d7SMarcel Moolenaar 				printf("Linux ELF exec handler removed\n");
1136466b14d7SMarcel Moolenaar 		} else
1137aa855a59SPeter Wemm 			printf("Could not deinstall ELF interpreter entry\n");
1138aa855a59SPeter Wemm 		break;
1139aa855a59SPeter Wemm 	default:
11403e019deaSPoul-Henning Kamp 		return EOPNOTSUPP;
1141d30ea4f5SPeter Wemm 	}
1142aa855a59SPeter Wemm 	return error;
1143aa855a59SPeter Wemm }
1144466b14d7SMarcel Moolenaar 
1145aa855a59SPeter Wemm static moduledata_t linux_elf_mod = {
1146aa855a59SPeter Wemm 	"linuxelf",
1147aa855a59SPeter Wemm 	linux_elf_modevent,
1148aa855a59SPeter Wemm 	0
1149aa855a59SPeter Wemm };
1150466b14d7SMarcel Moolenaar 
1151aa855a59SPeter Wemm DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1152