xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision 67d39748499e85cff626c202aa2cb6e9f180283e)
1 /*-
2  * Copyright (c) 1994-1996 Søren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/exec.h>
35 #include <sys/fcntl.h>
36 #include <sys/imgact.h>
37 #include <sys/imgact_aout.h>
38 #include <sys/imgact_elf.h>
39 #include <sys/kernel.h>
40 #include <sys/lock.h>
41 #include <sys/malloc.h>
42 #include <sys/module.h>
43 #include <sys/mutex.h>
44 #include <sys/proc.h>
45 #include <sys/signalvar.h>
46 #include <sys/syscallsubr.h>
47 #include <sys/sysctl.h>
48 #include <sys/sysent.h>
49 #include <sys/sysproto.h>
50 #include <sys/vnode.h>
51 #include <sys/eventhandler.h>
52 
53 #include <vm/vm.h>
54 #include <vm/pmap.h>
55 #include <vm/vm_extern.h>
56 #include <vm/vm_map.h>
57 #include <vm/vm_object.h>
58 #include <vm/vm_page.h>
59 #include <vm/vm_param.h>
60 
61 #include <machine/cpu.h>
62 #include <machine/cputypes.h>
63 #include <machine/md_var.h>
64 #include <machine/pcb.h>
65 
66 #include <i386/linux/linux.h>
67 #include <i386/linux/linux_proto.h>
68 #include <compat/linux/linux_emul.h>
69 #include <compat/linux/linux_futex.h>
70 #include <compat/linux/linux_ioctl.h>
71 #include <compat/linux/linux_mib.h>
72 #include <compat/linux/linux_misc.h>
73 #include <compat/linux/linux_signal.h>
74 #include <compat/linux/linux_util.h>
75 #include <compat/linux/linux_vdso.h>
76 
77 MODULE_VERSION(linux, 1);
78 
79 #if BYTE_ORDER == LITTLE_ENDIAN
80 #define SHELLMAGIC      0x2123 /* #! */
81 #else
82 #define SHELLMAGIC      0x2321
83 #endif
84 
85 #if defined(DEBUG)
86 SYSCTL_PROC(_compat_linux, OID_AUTO, debug,
87             CTLTYPE_STRING | CTLFLAG_RW,
88             0, 0, linux_sysctl_debug, "A",
89             "Linux debugging control");
90 #endif
91 
92 /*
93  * Allow the sendsig functions to use the ldebug() facility
94  * even though they are not syscalls themselves. Map them
95  * to syscall 0. This is slightly less bogus than using
96  * ldebug(sigreturn).
97  */
98 #define	LINUX_SYS_linux_rt_sendsig	0
99 #define	LINUX_SYS_linux_sendsig		0
100 
101 #define	LINUX_PS_STRINGS	(LINUX_USRSTACK - sizeof(struct ps_strings))
102 
103 static int linux_szsigcode;
104 static vm_object_t linux_shared_page_obj;
105 static char *linux_shared_page_mapping;
106 extern char _binary_linux_locore_o_start;
107 extern char _binary_linux_locore_o_end;
108 
109 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
110 
111 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
112 
113 static int	linux_fixup(register_t **stack_base,
114 		    struct image_params *iparams);
115 static int	elf_linux_fixup(register_t **stack_base,
116 		    struct image_params *iparams);
117 static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
118 static void	exec_linux_setregs(struct thread *td,
119 		    struct image_params *imgp, u_long stack);
120 static register_t *linux_copyout_strings(struct image_params *imgp);
121 static boolean_t linux_trans_osrel(const Elf_Note *note, int32_t *osrel);
122 static void	linux_vdso_install(void *param);
123 static void	linux_vdso_deinstall(void *param);
124 
125 static int linux_szplatform;
126 const char *linux_kplatform;
127 
128 static eventhandler_tag linux_exit_tag;
129 static eventhandler_tag linux_exec_tag;
130 static eventhandler_tag linux_thread_dtor_tag;
131 
132 /*
133  * Linux syscalls return negative errno's, we do positive and map them
134  * Reference:
135  *   FreeBSD: src/sys/sys/errno.h
136  *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
137  *            linux-2.6.17.8/include/asm-generic/errno.h
138  */
139 static int bsd_to_linux_errno[ELAST + 1] = {
140 	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
141 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
142 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
143 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
144 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
145 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
146 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
147 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
148 	  -6,  -6, -43, -42, -75,-125, -84, -95, -16, -74,
149 	 -72, -67, -71
150 };
151 
152 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
153 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
154 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
155 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
156 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
157 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
158 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
159 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
160 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
161 };
162 
163 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
164 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
165 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
166 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
167 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
168 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
169 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
170 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
171 	SIGIO, SIGURG, SIGSYS
172 };
173 
174 #define LINUX_T_UNKNOWN  255
175 static int _bsd_to_linux_trapcode[] = {
176 	LINUX_T_UNKNOWN,	/* 0 */
177 	6,			/* 1  T_PRIVINFLT */
178 	LINUX_T_UNKNOWN,	/* 2 */
179 	3,			/* 3  T_BPTFLT */
180 	LINUX_T_UNKNOWN,	/* 4 */
181 	LINUX_T_UNKNOWN,	/* 5 */
182 	16,			/* 6  T_ARITHTRAP */
183 	254,			/* 7  T_ASTFLT */
184 	LINUX_T_UNKNOWN,	/* 8 */
185 	13,			/* 9  T_PROTFLT */
186 	1,			/* 10 T_TRCTRAP */
187 	LINUX_T_UNKNOWN,	/* 11 */
188 	14,			/* 12 T_PAGEFLT */
189 	LINUX_T_UNKNOWN,	/* 13 */
190 	17,			/* 14 T_ALIGNFLT */
191 	LINUX_T_UNKNOWN,	/* 15 */
192 	LINUX_T_UNKNOWN,	/* 16 */
193 	LINUX_T_UNKNOWN,	/* 17 */
194 	0,			/* 18 T_DIVIDE */
195 	2,			/* 19 T_NMI */
196 	4,			/* 20 T_OFLOW */
197 	5,			/* 21 T_BOUND */
198 	7,			/* 22 T_DNA */
199 	8,			/* 23 T_DOUBLEFLT */
200 	9,			/* 24 T_FPOPFLT */
201 	10,			/* 25 T_TSSFLT */
202 	11,			/* 26 T_SEGNPFLT */
203 	12,			/* 27 T_STKFLT */
204 	18,			/* 28 T_MCHK */
205 	19,			/* 29 T_XMMFLT */
206 	15			/* 30 T_RESERVED */
207 };
208 #define bsd_to_linux_trapcode(code) \
209     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
210      _bsd_to_linux_trapcode[(code)]: \
211      LINUX_T_UNKNOWN)
212 
213 LINUX_VDSO_SYM_INTPTR(linux_sigcode);
214 LINUX_VDSO_SYM_INTPTR(linux_rt_sigcode);
215 LINUX_VDSO_SYM_INTPTR(linux_vsyscall);
216 
217 /*
218  * If FreeBSD & Linux have a difference of opinion about what a trap
219  * means, deal with it here.
220  *
221  * MPSAFE
222  */
223 static int
224 translate_traps(int signal, int trap_code)
225 {
226 	if (signal != SIGBUS)
227 		return (signal);
228 	switch (trap_code) {
229 	case T_PROTFLT:
230 	case T_TSSFLT:
231 	case T_DOUBLEFLT:
232 	case T_PAGEFLT:
233 		return (SIGSEGV);
234 	default:
235 		return (signal);
236 	}
237 }
238 
239 static int
240 linux_fixup(register_t **stack_base, struct image_params *imgp)
241 {
242 	register_t *argv, *envp;
243 
244 	argv = *stack_base;
245 	envp = *stack_base + (imgp->args->argc + 1);
246 	(*stack_base)--;
247 	suword(*stack_base, (intptr_t)(void *)envp);
248 	(*stack_base)--;
249 	suword(*stack_base, (intptr_t)(void *)argv);
250 	(*stack_base)--;
251 	suword(*stack_base, imgp->args->argc);
252 	return (0);
253 }
254 
255 static int
256 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
257 {
258 	struct proc *p;
259 	Elf32_Auxargs *args;
260 	Elf32_Addr *uplatform;
261 	struct ps_strings *arginfo;
262 	register_t *pos;
263 
264 	KASSERT(curthread->td_proc == imgp->proc,
265 	    ("unsafe elf_linux_fixup(), should be curproc"));
266 
267 	p = imgp->proc;
268 	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
269 	uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szplatform);
270 	args = (Elf32_Auxargs *)imgp->auxargs;
271 	pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
272 
273 	AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR,
274 	    imgp->proc->p_sysent->sv_shared_page_base);
275 	AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO, linux_vsyscall);
276 	AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature);
277 
278 	/*
279 	 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
280 	 * as it has appeared in the 2.4.0-rc7 first time.
281 	 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
282 	 * glibc falls back to the hard-coded CLK_TCK value when aux entry
283 	 * is not present.
284 	 * Also see linux_times() implementation.
285 	 */
286 	if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
287 		AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz);
288 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
289 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
290 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
291 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
292 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
293 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
294 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
295 	AUXARGS_ENTRY(pos, LINUX_AT_SECURE, 0);
296 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
297 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
298 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
299 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
300 	AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
301 	if (args->execfd != -1)
302 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
303 	AUXARGS_ENTRY(pos, AT_NULL, 0);
304 
305 	free(imgp->auxargs, M_TEMP);
306 	imgp->auxargs = NULL;
307 
308 	(*stack_base)--;
309 	suword(*stack_base, (register_t)imgp->args->argc);
310 	return (0);
311 }
312 
313 /*
314  * Copied from kern/kern_exec.c
315  */
316 static register_t *
317 linux_copyout_strings(struct image_params *imgp)
318 {
319 	int argc, envc;
320 	char **vectp;
321 	char *stringp, *destp;
322 	register_t *stack_base;
323 	struct ps_strings *arginfo;
324 	struct proc *p;
325 
326 	/*
327 	 * Calculate string base and vector table pointers.
328 	 */
329 	p = imgp->proc;
330 	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
331 	destp = (caddr_t)arginfo - SPARE_USRSPACE - linux_szplatform -
332 	    roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *));
333 
334 	/*
335 	 * install LINUX_PLATFORM
336 	 */
337 	copyout(linux_kplatform, ((caddr_t)arginfo - linux_szplatform),
338 	    linux_szplatform);
339 
340 	/*
341 	 * If we have a valid auxargs ptr, prepare some room
342 	 * on the stack.
343 	 */
344 	if (imgp->auxargs) {
345 		/*
346 		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
347 		 * lower compatibility.
348 		 */
349 		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
350 		    (LINUX_AT_COUNT * 2);
351 		/*
352 		 * The '+ 2' is for the null pointers at the end of each of
353 		 * the arg and env vector sets,and imgp->auxarg_size is room
354 		 * for argument of Runtime loader.
355 		 */
356 		vectp = (char **)(destp - (imgp->args->argc +
357 		    imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *));
358 	} else {
359 		/*
360 		 * The '+ 2' is for the null pointers at the end of each of
361 		 * the arg and env vector sets
362 		 */
363 		vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) *
364 		    sizeof(char *));
365 	}
366 
367 	/*
368 	 * vectp also becomes our initial stack base
369 	 */
370 	stack_base = (register_t *)vectp;
371 
372 	stringp = imgp->args->begin_argv;
373 	argc = imgp->args->argc;
374 	envc = imgp->args->envc;
375 
376 	/*
377 	 * Copy out strings - arguments and environment.
378 	 */
379 	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
380 
381 	/*
382 	 * Fill in "ps_strings" struct for ps, w, etc.
383 	 */
384 	suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
385 	suword(&arginfo->ps_nargvstr, argc);
386 
387 	/*
388 	 * Fill in argument portion of vector table.
389 	 */
390 	for (; argc > 0; --argc) {
391 		suword(vectp++, (long)(intptr_t)destp);
392 		while (*stringp++ != 0)
393 			destp++;
394 		destp++;
395 	}
396 
397 	/* a null vector table pointer separates the argp's from the envp's */
398 	suword(vectp++, 0);
399 
400 	suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
401 	suword(&arginfo->ps_nenvstr, envc);
402 
403 	/*
404 	 * Fill in environment portion of vector table.
405 	 */
406 	for (; envc > 0; --envc) {
407 		suword(vectp++, (long)(intptr_t)destp);
408 		while (*stringp++ != 0)
409 			destp++;
410 		destp++;
411 	}
412 
413 	/* end of vector table is a null pointer */
414 	suword(vectp, 0);
415 
416 	return (stack_base);
417 }
418 
419 static void
420 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
421 {
422 	struct thread *td = curthread;
423 	struct proc *p = td->td_proc;
424 	struct sigacts *psp;
425 	struct trapframe *regs;
426 	struct l_rt_sigframe *fp, frame;
427 	int sig, code;
428 	int oonstack;
429 
430 	sig = ksi->ksi_signo;
431 	code = ksi->ksi_code;
432 	PROC_LOCK_ASSERT(p, MA_OWNED);
433 	psp = p->p_sigacts;
434 	mtx_assert(&psp->ps_mtx, MA_OWNED);
435 	regs = td->td_frame;
436 	oonstack = sigonstack(regs->tf_esp);
437 
438 #ifdef DEBUG
439 	if (ldebug(rt_sendsig))
440 		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
441 		    catcher, sig, (void*)mask, code);
442 #endif
443 	/*
444 	 * Allocate space for the signal handler context.
445 	 */
446 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
447 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
448 		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
449 		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
450 	} else
451 		fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
452 	mtx_unlock(&psp->ps_mtx);
453 
454 	/*
455 	 * Build the argument list for the signal handler.
456 	 */
457 	if (p->p_sysent->sv_sigtbl)
458 		if (sig <= p->p_sysent->sv_sigsize)
459 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
460 
461 	bzero(&frame, sizeof(frame));
462 
463 	frame.sf_handler = catcher;
464 	frame.sf_sig = sig;
465 	frame.sf_siginfo = &fp->sf_si;
466 	frame.sf_ucontext = &fp->sf_sc;
467 
468 	/* Fill in POSIX parts */
469 	ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
470 
471 	/*
472 	 * Build the signal context to be used by sigreturn.
473 	 */
474 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
475 	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
476 
477 	frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp;
478 	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
479 	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
480 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
481 	PROC_UNLOCK(p);
482 
483 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
484 
485 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
486 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
487 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
488 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
489 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
490 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
491 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
492 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
493 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
494 	frame.sf_sc.uc_mcontext.sc_esp    = regs->tf_esp;
495 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
496 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
497 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
498 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
499 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
500 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
501 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
502 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
503 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
504 	frame.sf_sc.uc_mcontext.sc_cr2    = (register_t)ksi->ksi_addr;
505 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
506 
507 #ifdef DEBUG
508 	if (ldebug(rt_sendsig))
509 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
510 		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
511 		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
512 #endif
513 
514 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
515 		/*
516 		 * Process has trashed its stack; give it an illegal
517 		 * instruction to halt it in its tracks.
518 		 */
519 #ifdef DEBUG
520 		if (ldebug(rt_sendsig))
521 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
522 			    fp, oonstack);
523 #endif
524 		PROC_LOCK(p);
525 		sigexit(td, SIGILL);
526 	}
527 
528 	/*
529 	 * Build context to run handler in.
530 	 */
531 	regs->tf_esp = (int)fp;
532 	regs->tf_eip = linux_rt_sigcode;
533 	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
534 	regs->tf_cs = _ucodesel;
535 	regs->tf_ds = _udatasel;
536 	regs->tf_es = _udatasel;
537 	regs->tf_fs = _udatasel;
538 	regs->tf_ss = _udatasel;
539 	PROC_LOCK(p);
540 	mtx_lock(&psp->ps_mtx);
541 }
542 
543 
544 /*
545  * Send an interrupt to process.
546  *
547  * Stack is set up to allow sigcode stored
548  * in u. to call routine, followed by kcall
549  * to sigreturn routine below.  After sigreturn
550  * resets the signal mask, the stack, and the
551  * frame pointer, it returns to the user
552  * specified pc, psl.
553  */
554 static void
555 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
556 {
557 	struct thread *td = curthread;
558 	struct proc *p = td->td_proc;
559 	struct sigacts *psp;
560 	struct trapframe *regs;
561 	struct l_sigframe *fp, frame;
562 	l_sigset_t lmask;
563 	int sig, code;
564 	int oonstack, i;
565 
566 	PROC_LOCK_ASSERT(p, MA_OWNED);
567 	psp = p->p_sigacts;
568 	sig = ksi->ksi_signo;
569 	code = ksi->ksi_code;
570 	mtx_assert(&psp->ps_mtx, MA_OWNED);
571 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
572 		/* Signal handler installed with SA_SIGINFO. */
573 		linux_rt_sendsig(catcher, ksi, mask);
574 		return;
575 	}
576 	regs = td->td_frame;
577 	oonstack = sigonstack(regs->tf_esp);
578 
579 #ifdef DEBUG
580 	if (ldebug(sendsig))
581 		printf(ARGS(sendsig, "%p, %d, %p, %u"),
582 		    catcher, sig, (void*)mask, code);
583 #endif
584 
585 	/*
586 	 * Allocate space for the signal handler context.
587 	 */
588 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
589 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
590 		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
591 		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
592 	} else
593 		fp = (struct l_sigframe *)regs->tf_esp - 1;
594 	mtx_unlock(&psp->ps_mtx);
595 	PROC_UNLOCK(p);
596 
597 	/*
598 	 * Build the argument list for the signal handler.
599 	 */
600 	if (p->p_sysent->sv_sigtbl)
601 		if (sig <= p->p_sysent->sv_sigsize)
602 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
603 
604 	bzero(&frame, sizeof(frame));
605 
606 	frame.sf_handler = catcher;
607 	frame.sf_sig = sig;
608 
609 	bsd_to_linux_sigset(mask, &lmask);
610 
611 	/*
612 	 * Build the signal context to be used by sigreturn.
613 	 */
614 	frame.sf_sc.sc_mask   = lmask.__bits[0];
615 	frame.sf_sc.sc_gs     = rgs();
616 	frame.sf_sc.sc_fs     = regs->tf_fs;
617 	frame.sf_sc.sc_es     = regs->tf_es;
618 	frame.sf_sc.sc_ds     = regs->tf_ds;
619 	frame.sf_sc.sc_edi    = regs->tf_edi;
620 	frame.sf_sc.sc_esi    = regs->tf_esi;
621 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
622 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
623 	frame.sf_sc.sc_esp    = regs->tf_esp;
624 	frame.sf_sc.sc_edx    = regs->tf_edx;
625 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
626 	frame.sf_sc.sc_eax    = regs->tf_eax;
627 	frame.sf_sc.sc_eip    = regs->tf_eip;
628 	frame.sf_sc.sc_cs     = regs->tf_cs;
629 	frame.sf_sc.sc_eflags = regs->tf_eflags;
630 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
631 	frame.sf_sc.sc_ss     = regs->tf_ss;
632 	frame.sf_sc.sc_err    = regs->tf_err;
633 	frame.sf_sc.sc_cr2    = (register_t)ksi->ksi_addr;
634 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno);
635 
636 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
637 		frame.sf_extramask[i] = lmask.__bits[i+1];
638 
639 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
640 		/*
641 		 * Process has trashed its stack; give it an illegal
642 		 * instruction to halt it in its tracks.
643 		 */
644 		PROC_LOCK(p);
645 		sigexit(td, SIGILL);
646 	}
647 
648 	/*
649 	 * Build context to run handler in.
650 	 */
651 	regs->tf_esp = (int)fp;
652 	regs->tf_eip = linux_sigcode;
653 	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
654 	regs->tf_cs = _ucodesel;
655 	regs->tf_ds = _udatasel;
656 	regs->tf_es = _udatasel;
657 	regs->tf_fs = _udatasel;
658 	regs->tf_ss = _udatasel;
659 	PROC_LOCK(p);
660 	mtx_lock(&psp->ps_mtx);
661 }
662 
663 /*
664  * System call to cleanup state after a signal
665  * has been taken.  Reset signal mask and
666  * stack state from context left by sendsig (above).
667  * Return to previous pc and psl as specified by
668  * context left by sendsig. Check carefully to
669  * make sure that the user has not modified the
670  * psl to gain improper privileges or to cause
671  * a machine fault.
672  */
673 int
674 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
675 {
676 	struct l_sigframe frame;
677 	struct trapframe *regs;
678 	l_sigset_t lmask;
679 	sigset_t bmask;
680 	int eflags, i;
681 	ksiginfo_t ksi;
682 
683 	regs = td->td_frame;
684 
685 #ifdef DEBUG
686 	if (ldebug(sigreturn))
687 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
688 #endif
689 	/*
690 	 * The trampoline code hands us the sigframe.
691 	 * It is unsafe to keep track of it ourselves, in the event that a
692 	 * program jumps out of a signal handler.
693 	 */
694 	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
695 		return (EFAULT);
696 
697 	/*
698 	 * Check for security violations.
699 	 */
700 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
701 	eflags = frame.sf_sc.sc_eflags;
702 	if (!EFLAGS_SECURE(eflags, regs->tf_eflags))
703 		return (EINVAL);
704 
705 	/*
706 	 * Don't allow users to load a valid privileged %cs.  Let the
707 	 * hardware check for invalid selectors, excess privilege in
708 	 * other selectors, invalid %eip's and invalid %esp's.
709 	 */
710 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
711 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
712 		ksiginfo_init_trap(&ksi);
713 		ksi.ksi_signo = SIGBUS;
714 		ksi.ksi_code = BUS_OBJERR;
715 		ksi.ksi_trapno = T_PROTFLT;
716 		ksi.ksi_addr = (void *)regs->tf_eip;
717 		trapsignal(td, &ksi);
718 		return (EINVAL);
719 	}
720 
721 	lmask.__bits[0] = frame.sf_sc.sc_mask;
722 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
723 		lmask.__bits[i+1] = frame.sf_extramask[i];
724 	linux_to_bsd_sigset(&lmask, &bmask);
725 	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
726 
727 	/*
728 	 * Restore signal context.
729 	 */
730 	/* %gs was restored by the trampoline. */
731 	regs->tf_fs     = frame.sf_sc.sc_fs;
732 	regs->tf_es     = frame.sf_sc.sc_es;
733 	regs->tf_ds     = frame.sf_sc.sc_ds;
734 	regs->tf_edi    = frame.sf_sc.sc_edi;
735 	regs->tf_esi    = frame.sf_sc.sc_esi;
736 	regs->tf_ebp    = frame.sf_sc.sc_ebp;
737 	regs->tf_ebx    = frame.sf_sc.sc_ebx;
738 	regs->tf_edx    = frame.sf_sc.sc_edx;
739 	regs->tf_ecx    = frame.sf_sc.sc_ecx;
740 	regs->tf_eax    = frame.sf_sc.sc_eax;
741 	regs->tf_eip    = frame.sf_sc.sc_eip;
742 	regs->tf_cs     = frame.sf_sc.sc_cs;
743 	regs->tf_eflags = eflags;
744 	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
745 	regs->tf_ss     = frame.sf_sc.sc_ss;
746 
747 	return (EJUSTRETURN);
748 }
749 
750 /*
751  * System call to cleanup state after a signal
752  * has been taken.  Reset signal mask and
753  * stack state from context left by rt_sendsig (above).
754  * Return to previous pc and psl as specified by
755  * context left by sendsig. Check carefully to
756  * make sure that the user has not modified the
757  * psl to gain improper privileges or to cause
758  * a machine fault.
759  */
760 int
761 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
762 {
763 	struct l_ucontext uc;
764 	struct l_sigcontext *context;
765 	sigset_t bmask;
766 	l_stack_t *lss;
767 	stack_t ss;
768 	struct trapframe *regs;
769 	int eflags;
770 	ksiginfo_t ksi;
771 
772 	regs = td->td_frame;
773 
774 #ifdef DEBUG
775 	if (ldebug(rt_sigreturn))
776 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
777 #endif
778 	/*
779 	 * The trampoline code hands us the ucontext.
780 	 * It is unsafe to keep track of it ourselves, in the event that a
781 	 * program jumps out of a signal handler.
782 	 */
783 	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
784 		return (EFAULT);
785 
786 	context = &uc.uc_mcontext;
787 
788 	/*
789 	 * Check for security violations.
790 	 */
791 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
792 	eflags = context->sc_eflags;
793 	if (!EFLAGS_SECURE(eflags, regs->tf_eflags))
794 		return (EINVAL);
795 
796 	/*
797 	 * Don't allow users to load a valid privileged %cs.  Let the
798 	 * hardware check for invalid selectors, excess privilege in
799 	 * other selectors, invalid %eip's and invalid %esp's.
800 	 */
801 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
802 	if (!CS_SECURE(context->sc_cs)) {
803 		ksiginfo_init_trap(&ksi);
804 		ksi.ksi_signo = SIGBUS;
805 		ksi.ksi_code = BUS_OBJERR;
806 		ksi.ksi_trapno = T_PROTFLT;
807 		ksi.ksi_addr = (void *)regs->tf_eip;
808 		trapsignal(td, &ksi);
809 		return (EINVAL);
810 	}
811 
812 	linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
813 	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
814 
815 	/*
816 	 * Restore signal context
817 	 */
818 	/* %gs was restored by the trampoline. */
819 	regs->tf_fs     = context->sc_fs;
820 	regs->tf_es     = context->sc_es;
821 	regs->tf_ds     = context->sc_ds;
822 	regs->tf_edi    = context->sc_edi;
823 	regs->tf_esi    = context->sc_esi;
824 	regs->tf_ebp    = context->sc_ebp;
825 	regs->tf_ebx    = context->sc_ebx;
826 	regs->tf_edx    = context->sc_edx;
827 	regs->tf_ecx    = context->sc_ecx;
828 	regs->tf_eax    = context->sc_eax;
829 	regs->tf_eip    = context->sc_eip;
830 	regs->tf_cs     = context->sc_cs;
831 	regs->tf_eflags = eflags;
832 	regs->tf_esp    = context->sc_esp_at_signal;
833 	regs->tf_ss     = context->sc_ss;
834 
835 	/*
836 	 * call sigaltstack & ignore results..
837 	 */
838 	lss = &uc.uc_stack;
839 	ss.ss_sp = lss->ss_sp;
840 	ss.ss_size = lss->ss_size;
841 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
842 
843 #ifdef DEBUG
844 	if (ldebug(rt_sigreturn))
845 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
846 		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
847 #endif
848 	(void)kern_sigaltstack(td, &ss, NULL);
849 
850 	return (EJUSTRETURN);
851 }
852 
853 static int
854 linux_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
855 {
856 	struct proc *p;
857 	struct trapframe *frame;
858 
859 	p = td->td_proc;
860 	frame = td->td_frame;
861 
862 	sa->code = frame->tf_eax;
863 	sa->args[0] = frame->tf_ebx;
864 	sa->args[1] = frame->tf_ecx;
865 	sa->args[2] = frame->tf_edx;
866 	sa->args[3] = frame->tf_esi;
867 	sa->args[4] = frame->tf_edi;
868 	sa->args[5] = frame->tf_ebp;	/* Unconfirmed */
869 
870 	if (sa->code >= p->p_sysent->sv_size)
871 		sa->callp = &p->p_sysent->sv_table[0];
872  	else
873  		sa->callp = &p->p_sysent->sv_table[sa->code];
874 	sa->narg = sa->callp->sy_narg;
875 
876 	td->td_retval[0] = 0;
877 	td->td_retval[1] = frame->tf_edx;
878 
879 	return (0);
880 }
881 
882 /*
883  * If a linux binary is exec'ing something, try this image activator
884  * first.  We override standard shell script execution in order to
885  * be able to modify the interpreter path.  We only do this if a linux
886  * binary is doing the exec, so we do not create an EXEC module for it.
887  */
888 static int	exec_linux_imgact_try(struct image_params *iparams);
889 
890 static int
891 exec_linux_imgact_try(struct image_params *imgp)
892 {
893     const char *head = (const char *)imgp->image_header;
894     char *rpath;
895     int error = -1;
896 
897     /*
898      * The interpreter for shell scripts run from a linux binary needs
899      * to be located in /compat/linux if possible in order to recursively
900      * maintain linux path emulation.
901      */
902     if (((const short *)head)[0] == SHELLMAGIC) {
903 	    /*
904 	     * Run our normal shell image activator.  If it succeeds attempt
905 	     * to use the alternate path for the interpreter.  If an alternate
906 	     * path is found, use our stringspace to store it.
907 	     */
908 	    if ((error = exec_shell_imgact(imgp)) == 0) {
909 		    linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
910 			imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD);
911 		    if (rpath != NULL)
912 			    imgp->args->fname_buf =
913 				imgp->interpreter_name = rpath;
914 	    }
915     }
916     return (error);
917 }
918 
919 /*
920  * exec_setregs may initialize some registers differently than Linux
921  * does, thus potentially confusing Linux binaries. If necessary, we
922  * override the exec_setregs default(s) here.
923  */
924 static void
925 exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack)
926 {
927 	struct pcb *pcb = td->td_pcb;
928 
929 	exec_setregs(td, imgp, stack);
930 
931 	/* Linux sets %gs to 0, we default to _udatasel */
932 	pcb->pcb_gs = 0;
933 	load_gs(0);
934 
935 	pcb->pcb_initial_npxcw = __LINUX_NPXCW__;
936 }
937 
938 static void
939 linux_get_machine(const char **dst)
940 {
941 
942 	switch (cpu_class) {
943 	case CPUCLASS_686:
944 		*dst = "i686";
945 		break;
946 	case CPUCLASS_586:
947 		*dst = "i586";
948 		break;
949 	case CPUCLASS_486:
950 		*dst = "i486";
951 		break;
952 	default:
953 		*dst = "i386";
954 	}
955 }
956 
957 struct sysentvec linux_sysvec = {
958 	.sv_size	= LINUX_SYS_MAXSYSCALL,
959 	.sv_table	= linux_sysent,
960 	.sv_mask	= 0,
961 	.sv_sigsize	= LINUX_SIGTBLSZ,
962 	.sv_sigtbl	= bsd_to_linux_signal,
963 	.sv_errsize	= ELAST + 1,
964 	.sv_errtbl	= bsd_to_linux_errno,
965 	.sv_transtrap	= translate_traps,
966 	.sv_fixup	= linux_fixup,
967 	.sv_sendsig	= linux_sendsig,
968 	.sv_sigcode	= &_binary_linux_locore_o_start,
969 	.sv_szsigcode	= &linux_szsigcode,
970 	.sv_prepsyscall	= NULL,
971 	.sv_name	= "Linux a.out",
972 	.sv_coredump	= NULL,
973 	.sv_imgact_try	= exec_linux_imgact_try,
974 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
975 	.sv_pagesize	= PAGE_SIZE,
976 	.sv_minuser	= VM_MIN_ADDRESS,
977 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
978 	.sv_usrstack	= LINUX_USRSTACK,
979 	.sv_psstrings	= PS_STRINGS,
980 	.sv_stackprot	= VM_PROT_ALL,
981 	.sv_copyout_strings = exec_copyout_strings,
982 	.sv_setregs	= exec_linux_setregs,
983 	.sv_fixlimit	= NULL,
984 	.sv_maxssiz	= NULL,
985 	.sv_flags	= SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32,
986 	.sv_set_syscall_retval = cpu_set_syscall_retval,
987 	.sv_fetch_syscall_args = linux_fetch_syscall_args,
988 	.sv_syscallnames = NULL,
989 	.sv_shared_page_base = LINUX_SHAREDPAGE,
990 	.sv_shared_page_len = PAGE_SIZE,
991 	.sv_schedtail	= linux_schedtail,
992 	.sv_thread_detach = linux_thread_detach,
993 };
994 INIT_SYSENTVEC(aout_sysvec, &linux_sysvec);
995 
996 struct sysentvec elf_linux_sysvec = {
997 	.sv_size	= LINUX_SYS_MAXSYSCALL,
998 	.sv_table	= linux_sysent,
999 	.sv_mask	= 0,
1000 	.sv_sigsize	= LINUX_SIGTBLSZ,
1001 	.sv_sigtbl	= bsd_to_linux_signal,
1002 	.sv_errsize	= ELAST + 1,
1003 	.sv_errtbl	= bsd_to_linux_errno,
1004 	.sv_transtrap	= translate_traps,
1005 	.sv_fixup	= elf_linux_fixup,
1006 	.sv_sendsig	= linux_sendsig,
1007 	.sv_sigcode	= &_binary_linux_locore_o_start,
1008 	.sv_szsigcode	= &linux_szsigcode,
1009 	.sv_prepsyscall	= NULL,
1010 	.sv_name	= "Linux ELF",
1011 	.sv_coredump	= elf32_coredump,
1012 	.sv_imgact_try	= exec_linux_imgact_try,
1013 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
1014 	.sv_pagesize	= PAGE_SIZE,
1015 	.sv_minuser	= VM_MIN_ADDRESS,
1016 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
1017 	.sv_usrstack	= LINUX_USRSTACK,
1018 	.sv_psstrings	= LINUX_PS_STRINGS,
1019 	.sv_stackprot	= VM_PROT_ALL,
1020 	.sv_copyout_strings = linux_copyout_strings,
1021 	.sv_setregs	= exec_linux_setregs,
1022 	.sv_fixlimit	= NULL,
1023 	.sv_maxssiz	= NULL,
1024 	.sv_flags	= SV_ABI_LINUX | SV_IA32 | SV_ILP32 | SV_SHP,
1025 	.sv_set_syscall_retval = cpu_set_syscall_retval,
1026 	.sv_fetch_syscall_args = linux_fetch_syscall_args,
1027 	.sv_syscallnames = NULL,
1028 	.sv_shared_page_base = LINUX_SHAREDPAGE,
1029 	.sv_shared_page_len = PAGE_SIZE,
1030 	.sv_schedtail	= linux_schedtail,
1031 	.sv_thread_detach = linux_thread_detach,
1032 };
1033 
1034 static void
1035 linux_vdso_install(void *param)
1036 {
1037 
1038 	linux_szsigcode = (&_binary_linux_locore_o_end -
1039 	    &_binary_linux_locore_o_start);
1040 
1041 	if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len)
1042 		panic("Linux invalid vdso size\n");
1043 
1044 	__elfN(linux_vdso_fixup)(&elf_linux_sysvec);
1045 
1046 	linux_shared_page_obj = __elfN(linux_shared_page_init)
1047 	    (&linux_shared_page_mapping);
1048 
1049 	__elfN(linux_vdso_reloc)(&elf_linux_sysvec, LINUX_SHAREDPAGE);
1050 
1051 	bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping,
1052 	    linux_szsigcode);
1053 	elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj;
1054 }
1055 SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY,
1056     (sysinit_cfunc_t)linux_vdso_install, NULL);
1057 
1058 static void
1059 linux_vdso_deinstall(void *param)
1060 {
1061 
1062 	__elfN(linux_shared_page_fini)(linux_shared_page_obj);
1063 };
1064 SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST,
1065     (sysinit_cfunc_t)linux_vdso_deinstall, NULL);
1066 
1067 static char GNU_ABI_VENDOR[] = "GNU";
1068 static int GNULINUX_ABI_DESC = 0;
1069 
1070 static boolean_t
1071 linux_trans_osrel(const Elf_Note *note, int32_t *osrel)
1072 {
1073 	const Elf32_Word *desc;
1074 	uintptr_t p;
1075 
1076 	p = (uintptr_t)(note + 1);
1077 	p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
1078 
1079 	desc = (const Elf32_Word *)p;
1080 	if (desc[0] != GNULINUX_ABI_DESC)
1081 		return (FALSE);
1082 
1083 	/*
1084 	 * For linux we encode osrel as follows (see linux_mib.c):
1085 	 * VVVMMMIII (version, major, minor), see linux_mib.c.
1086 	 */
1087 	*osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
1088 
1089 	return (TRUE);
1090 }
1091 
1092 static Elf_Brandnote linux_brandnote = {
1093 	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),
1094 	.hdr.n_descsz	= 16,	/* XXX at least 16 */
1095 	.hdr.n_type	= 1,
1096 	.vendor		= GNU_ABI_VENDOR,
1097 	.flags		= BN_TRANSLATE_OSREL,
1098 	.trans_osrel	= linux_trans_osrel
1099 };
1100 
1101 static Elf32_Brandinfo linux_brand = {
1102 	.brand		= ELFOSABI_LINUX,
1103 	.machine	= EM_386,
1104 	.compat_3_brand	= "Linux",
1105 	.emul_path	= "/compat/linux",
1106 	.interp_path	= "/lib/ld-linux.so.1",
1107 	.sysvec		= &elf_linux_sysvec,
1108 	.interp_newpath	= NULL,
1109 	.brand_note	= &linux_brandnote,
1110 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1111 };
1112 
1113 static Elf32_Brandinfo linux_glibc2brand = {
1114 	.brand		= ELFOSABI_LINUX,
1115 	.machine	= EM_386,
1116 	.compat_3_brand	= "Linux",
1117 	.emul_path	= "/compat/linux",
1118 	.interp_path	= "/lib/ld-linux.so.2",
1119 	.sysvec		= &elf_linux_sysvec,
1120 	.interp_newpath	= NULL,
1121 	.brand_note	= &linux_brandnote,
1122 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1123 };
1124 
1125 Elf32_Brandinfo *linux_brandlist[] = {
1126 	&linux_brand,
1127 	&linux_glibc2brand,
1128 	NULL
1129 };
1130 
1131 static int
1132 linux_elf_modevent(module_t mod, int type, void *data)
1133 {
1134 	Elf32_Brandinfo **brandinfo;
1135 	int error;
1136 	struct linux_ioctl_handler **lihp;
1137 
1138 	error = 0;
1139 
1140 	switch(type) {
1141 	case MOD_LOAD:
1142 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1143 		     ++brandinfo)
1144 			if (elf32_insert_brand_entry(*brandinfo) < 0)
1145 				error = EINVAL;
1146 		if (error == 0) {
1147 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1148 				linux_ioctl_register_handler(*lihp);
1149 			LIST_INIT(&futex_list);
1150 			mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1151 			linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit,
1152 			      NULL, 1000);
1153 			linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec,
1154 			      NULL, 1000);
1155 			linux_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor,
1156 			    linux_thread_dtor, NULL, EVENTHANDLER_PRI_ANY);
1157 			linux_get_machine(&linux_kplatform);
1158 			linux_szplatform = roundup(strlen(linux_kplatform) + 1,
1159 			    sizeof(char *));
1160 			linux_osd_jail_register();
1161 			stclohz = (stathz ? stathz : hz);
1162 			if (bootverbose)
1163 				printf("Linux ELF exec handler installed\n");
1164 		} else
1165 			printf("cannot insert Linux ELF brand handler\n");
1166 		break;
1167 	case MOD_UNLOAD:
1168 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1169 		     ++brandinfo)
1170 			if (elf32_brand_inuse(*brandinfo))
1171 				error = EBUSY;
1172 		if (error == 0) {
1173 			for (brandinfo = &linux_brandlist[0];
1174 			     *brandinfo != NULL; ++brandinfo)
1175 				if (elf32_remove_brand_entry(*brandinfo) < 0)
1176 					error = EINVAL;
1177 		}
1178 		if (error == 0) {
1179 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1180 				linux_ioctl_unregister_handler(*lihp);
1181 			mtx_destroy(&futex_mtx);
1182 			EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1183 			EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1184 			EVENTHANDLER_DEREGISTER(thread_dtor, linux_thread_dtor_tag);
1185 			linux_osd_jail_deregister();
1186 			if (bootverbose)
1187 				printf("Linux ELF exec handler removed\n");
1188 		} else
1189 			printf("Could not deinstall ELF interpreter entry\n");
1190 		break;
1191 	default:
1192 		return (EOPNOTSUPP);
1193 	}
1194 	return (error);
1195 }
1196 
1197 static moduledata_t linux_elf_mod = {
1198 	"linuxelf",
1199 	linux_elf_modevent,
1200 	0
1201 };
1202 
1203 DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1204