xref: /freebsd/sys/i386/linux/linux_machdep.c (revision 262e143bd46171a6415a5b28af260a5efa2a3db8)
1 /*-
2  * Copyright (c) 2000 Marcel Moolenaar
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/imgact.h>
35 #include <sys/lock.h>
36 #include <sys/malloc.h>
37 #include <sys/mman.h>
38 #include <sys/mutex.h>
39 #include <sys/proc.h>
40 #include <sys/resource.h>
41 #include <sys/resourcevar.h>
42 #include <sys/signalvar.h>
43 #include <sys/syscallsubr.h>
44 #include <sys/sysproto.h>
45 #include <sys/unistd.h>
46 
47 #include <machine/frame.h>
48 #include <machine/psl.h>
49 #include <machine/segments.h>
50 #include <machine/sysarch.h>
51 
52 #include <vm/vm.h>
53 #include <vm/pmap.h>
54 #include <vm/vm_map.h>
55 
56 #include <i386/linux/linux.h>
57 #include <i386/linux/linux_proto.h>
58 #include <compat/linux/linux_ipc.h>
59 #include <compat/linux/linux_signal.h>
60 #include <compat/linux/linux_util.h>
61 
62 struct l_descriptor {
63 	l_uint		entry_number;
64 	l_ulong		base_addr;
65 	l_uint		limit;
66 	l_uint		seg_32bit:1;
67 	l_uint		contents:2;
68 	l_uint		read_exec_only:1;
69 	l_uint		limit_in_pages:1;
70 	l_uint		seg_not_present:1;
71 	l_uint		useable:1;
72 };
73 
74 struct l_old_select_argv {
75 	l_int		nfds;
76 	l_fd_set	*readfds;
77 	l_fd_set	*writefds;
78 	l_fd_set	*exceptfds;
79 	struct l_timeval	*timeout;
80 };
81 
82 int
83 linux_to_bsd_sigaltstack(int lsa)
84 {
85 	int bsa = 0;
86 
87 	if (lsa & LINUX_SS_DISABLE)
88 		bsa |= SS_DISABLE;
89 	if (lsa & LINUX_SS_ONSTACK)
90 		bsa |= SS_ONSTACK;
91 	return (bsa);
92 }
93 
94 int
95 bsd_to_linux_sigaltstack(int bsa)
96 {
97 	int lsa = 0;
98 
99 	if (bsa & SS_DISABLE)
100 		lsa |= LINUX_SS_DISABLE;
101 	if (bsa & SS_ONSTACK)
102 		lsa |= LINUX_SS_ONSTACK;
103 	return (lsa);
104 }
105 
106 int
107 linux_execve(struct thread *td, struct linux_execve_args *args)
108 {
109 	int error;
110 	char *newpath;
111 	struct image_args eargs;
112 
113 	LCONVPATHEXIST(td, args->path, &newpath);
114 
115 #ifdef DEBUG
116 	if (ldebug(execve))
117 		printf(ARGS(execve, "%s"), newpath);
118 #endif
119 
120 	error = exec_copyin_args(&eargs, newpath, UIO_SYSSPACE,
121 	    args->argp, args->envp);
122 	free(newpath, M_TEMP);
123 	if (error == 0)
124 		error = kern_execve(td, &eargs, NULL);
125 	exec_free_args(&eargs);
126 	return (error);
127 }
128 
129 struct l_ipc_kludge {
130 	struct l_msgbuf *msgp;
131 	l_long msgtyp;
132 };
133 
134 int
135 linux_ipc(struct thread *td, struct linux_ipc_args *args)
136 {
137 
138 	switch (args->what & 0xFFFF) {
139 	case LINUX_SEMOP: {
140 		struct linux_semop_args a;
141 
142 		a.semid = args->arg1;
143 		a.tsops = args->ptr;
144 		a.nsops = args->arg2;
145 		return (linux_semop(td, &a));
146 	}
147 	case LINUX_SEMGET: {
148 		struct linux_semget_args a;
149 
150 		a.key = args->arg1;
151 		a.nsems = args->arg2;
152 		a.semflg = args->arg3;
153 		return (linux_semget(td, &a));
154 	}
155 	case LINUX_SEMCTL: {
156 		struct linux_semctl_args a;
157 		int error;
158 
159 		a.semid = args->arg1;
160 		a.semnum = args->arg2;
161 		a.cmd = args->arg3;
162 		error = copyin(args->ptr, &a.arg, sizeof(a.arg));
163 		if (error)
164 			return (error);
165 		return (linux_semctl(td, &a));
166 	}
167 	case LINUX_MSGSND: {
168 		struct linux_msgsnd_args a;
169 
170 		a.msqid = args->arg1;
171 		a.msgp = args->ptr;
172 		a.msgsz = args->arg2;
173 		a.msgflg = args->arg3;
174 		return (linux_msgsnd(td, &a));
175 	}
176 	case LINUX_MSGRCV: {
177 		struct linux_msgrcv_args a;
178 
179 		a.msqid = args->arg1;
180 		a.msgsz = args->arg2;
181 		a.msgflg = args->arg3;
182 		if ((args->what >> 16) == 0) {
183 			struct l_ipc_kludge tmp;
184 			int error;
185 
186 			if (args->ptr == NULL)
187 				return (EINVAL);
188 			error = copyin(args->ptr, &tmp, sizeof(tmp));
189 			if (error)
190 				return (error);
191 			a.msgp = tmp.msgp;
192 			a.msgtyp = tmp.msgtyp;
193 		} else {
194 			a.msgp = args->ptr;
195 			a.msgtyp = args->arg5;
196 		}
197 		return (linux_msgrcv(td, &a));
198 	}
199 	case LINUX_MSGGET: {
200 		struct linux_msgget_args a;
201 
202 		a.key = args->arg1;
203 		a.msgflg = args->arg2;
204 		return (linux_msgget(td, &a));
205 	}
206 	case LINUX_MSGCTL: {
207 		struct linux_msgctl_args a;
208 
209 		a.msqid = args->arg1;
210 		a.cmd = args->arg2;
211 		a.buf = args->ptr;
212 		return (linux_msgctl(td, &a));
213 	}
214 	case LINUX_SHMAT: {
215 		struct linux_shmat_args a;
216 
217 		a.shmid = args->arg1;
218 		a.shmaddr = args->ptr;
219 		a.shmflg = args->arg2;
220 		a.raddr = (l_ulong *)args->arg3;
221 		return (linux_shmat(td, &a));
222 	}
223 	case LINUX_SHMDT: {
224 		struct linux_shmdt_args a;
225 
226 		a.shmaddr = args->ptr;
227 		return (linux_shmdt(td, &a));
228 	}
229 	case LINUX_SHMGET: {
230 		struct linux_shmget_args a;
231 
232 		a.key = args->arg1;
233 		a.size = args->arg2;
234 		a.shmflg = args->arg3;
235 		return (linux_shmget(td, &a));
236 	}
237 	case LINUX_SHMCTL: {
238 		struct linux_shmctl_args a;
239 
240 		a.shmid = args->arg1;
241 		a.cmd = args->arg2;
242 		a.buf = args->ptr;
243 		return (linux_shmctl(td, &a));
244 	}
245 	default:
246 		break;
247 	}
248 
249 	return (EINVAL);
250 }
251 
252 int
253 linux_old_select(struct thread *td, struct linux_old_select_args *args)
254 {
255 	struct l_old_select_argv linux_args;
256 	struct linux_select_args newsel;
257 	int error;
258 
259 #ifdef DEBUG
260 	if (ldebug(old_select))
261 		printf(ARGS(old_select, "%p"), args->ptr);
262 #endif
263 
264 	error = copyin(args->ptr, &linux_args, sizeof(linux_args));
265 	if (error)
266 		return (error);
267 
268 	newsel.nfds = linux_args.nfds;
269 	newsel.readfds = linux_args.readfds;
270 	newsel.writefds = linux_args.writefds;
271 	newsel.exceptfds = linux_args.exceptfds;
272 	newsel.timeout = linux_args.timeout;
273 	return (linux_select(td, &newsel));
274 }
275 
276 int
277 linux_fork(struct thread *td, struct linux_fork_args *args)
278 {
279 	int error;
280 
281 #ifdef DEBUG
282 	if (ldebug(fork))
283 		printf(ARGS(fork, ""));
284 #endif
285 
286 	if ((error = fork(td, (struct fork_args *)args)) != 0)
287 		return (error);
288 
289 	if (td->td_retval[1] == 1)
290 		td->td_retval[0] = 0;
291 	return (0);
292 }
293 
294 int
295 linux_vfork(struct thread *td, struct linux_vfork_args *args)
296 {
297 	int error;
298 
299 #ifdef DEBUG
300 	if (ldebug(vfork))
301 		printf(ARGS(vfork, ""));
302 #endif
303 
304 	if ((error = vfork(td, (struct vfork_args *)args)) != 0)
305 		return (error);
306 	/* Are we the child? */
307 	if (td->td_retval[1] == 1)
308 		td->td_retval[0] = 0;
309 	return (0);
310 }
311 
312 #define CLONE_VM	0x100
313 #define CLONE_FS	0x200
314 #define CLONE_FILES	0x400
315 #define CLONE_SIGHAND	0x800
316 #define CLONE_PID	0x1000
317 #define CLONE_THREAD	0x10000
318 
319 #define THREADING_FLAGS	(CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND)
320 
321 int
322 linux_clone(struct thread *td, struct linux_clone_args *args)
323 {
324 	int error, ff = RFPROC | RFSTOPPED;
325 	struct proc *p2;
326 	struct thread *td2;
327 	int exit_signal;
328 
329 #ifdef DEBUG
330 	if (ldebug(clone)) {
331 		printf(ARGS(clone, "flags %x, stack %x"),
332 		    (unsigned int)args->flags, (unsigned int)args->stack);
333 		if (args->flags & CLONE_PID)
334 			printf(LMSG("CLONE_PID not yet supported"));
335 	}
336 #endif
337 
338 	if (!args->stack)
339 		return (EINVAL);
340 
341 	exit_signal = args->flags & 0x000000ff;
342 	if (exit_signal >= LINUX_NSIG)
343 		return (EINVAL);
344 
345 	if (exit_signal <= LINUX_SIGTBLSZ)
346 		exit_signal = linux_to_bsd_signal[_SIG_IDX(exit_signal)];
347 
348 	if (args->flags & CLONE_VM)
349 		ff |= RFMEM;
350 	if (args->flags & CLONE_SIGHAND)
351 		ff |= RFSIGSHARE;
352 	if (!(args->flags & CLONE_FILES))
353 		ff |= RFFDG;
354 
355 	/*
356 	 * Attempt to detect when linux_clone(2) is used for creating
357 	 * kernel threads. Unfortunately despite the existence of the
358 	 * CLONE_THREAD flag, version of linuxthreads package used in
359 	 * most popular distros as of beginning of 2005 doesn't make
360 	 * any use of it. Therefore, this detection relay fully on
361 	 * empirical observation that linuxthreads sets certain
362 	 * combination of flags, so that we can make more or less
363 	 * precise detection and notify the FreeBSD kernel that several
364 	 * processes are in fact part of the same threading group, so
365 	 * that special treatment is necessary for signal delivery
366 	 * between those processes and fd locking.
367 	 */
368 	if ((args->flags & 0xffffff00) == THREADING_FLAGS)
369 		ff |= RFTHREAD;
370 
371 	error = fork1(td, ff, 0, &p2);
372 	if (error)
373 		return (error);
374 
375 
376 	PROC_LOCK(p2);
377 	p2->p_sigparent = exit_signal;
378 	PROC_UNLOCK(p2);
379 	td2 = FIRST_THREAD_IN_PROC(p2);
380 	td2->td_frame->tf_esp = (unsigned int)args->stack;
381 
382 #ifdef DEBUG
383 	if (ldebug(clone))
384 		printf(LMSG("clone: successful rfork to %ld, stack %p sig = %d"),
385 		    (long)p2->p_pid, args->stack, exit_signal);
386 #endif
387 
388 	/*
389 	 * Make this runnable after we are finished with it.
390 	 */
391 	mtx_lock_spin(&sched_lock);
392 	TD_SET_CAN_RUN(td2);
393 	setrunqueue(td2, SRQ_BORING);
394 	mtx_unlock_spin(&sched_lock);
395 
396 	td->td_retval[0] = p2->p_pid;
397 	td->td_retval[1] = 0;
398 	return (0);
399 }
400 
401 /* XXX move */
402 struct l_mmap_argv {
403 	l_caddr_t	addr;
404 	l_int		len;
405 	l_int		prot;
406 	l_int		flags;
407 	l_int		fd;
408 	l_int		pos;
409 };
410 
411 #define STACK_SIZE  (2 * 1024 * 1024)
412 #define GUARD_SIZE  (4 * PAGE_SIZE)
413 
414 static int linux_mmap_common(struct thread *, struct l_mmap_argv *);
415 
416 int
417 linux_mmap2(struct thread *td, struct linux_mmap2_args *args)
418 {
419 	struct l_mmap_argv linux_args;
420 
421 #ifdef DEBUG
422 	if (ldebug(mmap2))
423 		printf(ARGS(mmap2, "%p, %d, %d, 0x%08x, %d, %d"),
424 		    (void *)args->addr, args->len, args->prot,
425 		    args->flags, args->fd, args->pgoff);
426 #endif
427 
428 	linux_args.addr = (l_caddr_t)args->addr;
429 	linux_args.len = args->len;
430 	linux_args.prot = args->prot;
431 	linux_args.flags = args->flags;
432 	linux_args.fd = args->fd;
433 	linux_args.pos = args->pgoff * PAGE_SIZE;
434 
435 	return (linux_mmap_common(td, &linux_args));
436 }
437 
438 int
439 linux_mmap(struct thread *td, struct linux_mmap_args *args)
440 {
441 	int error;
442 	struct l_mmap_argv linux_args;
443 
444 	error = copyin(args->ptr, &linux_args, sizeof(linux_args));
445 	if (error)
446 		return (error);
447 
448 #ifdef DEBUG
449 	if (ldebug(mmap))
450 		printf(ARGS(mmap, "%p, %d, %d, 0x%08x, %d, %d"),
451 		    (void *)linux_args.addr, linux_args.len, linux_args.prot,
452 		    linux_args.flags, linux_args.fd, linux_args.pos);
453 #endif
454 
455 	return (linux_mmap_common(td, &linux_args));
456 }
457 
458 static int
459 linux_mmap_common(struct thread *td, struct l_mmap_argv *linux_args)
460 {
461 	struct proc *p = td->td_proc;
462 	struct mmap_args /* {
463 		caddr_t addr;
464 		size_t len;
465 		int prot;
466 		int flags;
467 		int fd;
468 		long pad;
469 		off_t pos;
470 	} */ bsd_args;
471 	int error;
472 
473 	error = 0;
474 	bsd_args.flags = 0;
475 	if (linux_args->flags & LINUX_MAP_SHARED)
476 		bsd_args.flags |= MAP_SHARED;
477 	if (linux_args->flags & LINUX_MAP_PRIVATE)
478 		bsd_args.flags |= MAP_PRIVATE;
479 	if (linux_args->flags & LINUX_MAP_FIXED)
480 		bsd_args.flags |= MAP_FIXED;
481 	if (linux_args->flags & LINUX_MAP_ANON)
482 		bsd_args.flags |= MAP_ANON;
483 	else
484 		bsd_args.flags |= MAP_NOSYNC;
485 	if (linux_args->flags & LINUX_MAP_GROWSDOWN) {
486 		bsd_args.flags |= MAP_STACK;
487 
488 		/* The linux MAP_GROWSDOWN option does not limit auto
489 		 * growth of the region.  Linux mmap with this option
490 		 * takes as addr the inital BOS, and as len, the initial
491 		 * region size.  It can then grow down from addr without
492 		 * limit.  However, linux threads has an implicit internal
493 		 * limit to stack size of STACK_SIZE.  Its just not
494 		 * enforced explicitly in linux.  But, here we impose
495 		 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
496 		 * region, since we can do this with our mmap.
497 		 *
498 		 * Our mmap with MAP_STACK takes addr as the maximum
499 		 * downsize limit on BOS, and as len the max size of
500 		 * the region.  It them maps the top SGROWSIZ bytes,
501 		 * and autgrows the region down, up to the limit
502 		 * in addr.
503 		 *
504 		 * If we don't use the MAP_STACK option, the effect
505 		 * of this code is to allocate a stack region of a
506 		 * fixed size of (STACK_SIZE - GUARD_SIZE).
507 		 */
508 
509 		/* This gives us TOS */
510 		bsd_args.addr = linux_args->addr + linux_args->len;
511 
512 		if (bsd_args.addr > p->p_vmspace->vm_maxsaddr) {
513 			/* Some linux apps will attempt to mmap
514 			 * thread stacks near the top of their
515 			 * address space.  If their TOS is greater
516 			 * than vm_maxsaddr, vm_map_growstack()
517 			 * will confuse the thread stack with the
518 			 * process stack and deliver a SEGV if they
519 			 * attempt to grow the thread stack past their
520 			 * current stacksize rlimit.  To avoid this,
521 			 * adjust vm_maxsaddr upwards to reflect
522 			 * the current stacksize rlimit rather
523 			 * than the maximum possible stacksize.
524 			 * It would be better to adjust the
525 			 * mmap'ed region, but some apps do not check
526 			 * mmap's return value.
527 			 */
528 			PROC_LOCK(p);
529 			p->p_vmspace->vm_maxsaddr = (char *)USRSTACK -
530 			    lim_cur(p, RLIMIT_STACK);
531 			PROC_UNLOCK(p);
532 		}
533 
534 		/* This gives us our maximum stack size */
535 		if (linux_args->len > STACK_SIZE - GUARD_SIZE)
536 			bsd_args.len = linux_args->len;
537 		else
538 			bsd_args.len  = STACK_SIZE - GUARD_SIZE;
539 
540 		/* This gives us a new BOS.  If we're using VM_STACK, then
541 		 * mmap will just map the top SGROWSIZ bytes, and let
542 		 * the stack grow down to the limit at BOS.  If we're
543 		 * not using VM_STACK we map the full stack, since we
544 		 * don't have a way to autogrow it.
545 		 */
546 		bsd_args.addr -= bsd_args.len;
547 	} else {
548 		bsd_args.addr = linux_args->addr;
549 		bsd_args.len  = linux_args->len;
550 	}
551 
552 	bsd_args.prot = linux_args->prot | PROT_READ;	/* always required */
553 	if (linux_args->flags & LINUX_MAP_ANON)
554 		bsd_args.fd = -1;
555 	else
556 		bsd_args.fd = linux_args->fd;
557 	bsd_args.pos = linux_args->pos;
558 	bsd_args.pad = 0;
559 
560 #ifdef DEBUG
561 	if (ldebug(mmap))
562 		printf("-> %s(%p, %d, %d, 0x%08x, %d, 0x%x)\n",
563 		    __func__,
564 		    (void *)bsd_args.addr, bsd_args.len, bsd_args.prot,
565 		    bsd_args.flags, bsd_args.fd, (int)bsd_args.pos);
566 #endif
567 	error = mmap(td, &bsd_args);
568 #ifdef DEBUG
569 	if (ldebug(mmap))
570 		printf("-> %s() return: 0x%x (0x%08x)\n",
571 			__func__, error, (u_int)td->td_retval[0]);
572 #endif
573 	return (error);
574 }
575 
576 int
577 linux_pipe(struct thread *td, struct linux_pipe_args *args)
578 {
579 	int error;
580 	int reg_edx;
581 
582 #ifdef DEBUG
583 	if (ldebug(pipe))
584 		printf(ARGS(pipe, "*"));
585 #endif
586 
587 	reg_edx = td->td_retval[1];
588 	error = pipe(td, 0);
589 	if (error) {
590 		td->td_retval[1] = reg_edx;
591 		return (error);
592 	}
593 
594 	error = copyout(td->td_retval, args->pipefds, 2*sizeof(int));
595 	if (error) {
596 		td->td_retval[1] = reg_edx;
597 		return (error);
598 	}
599 
600 	td->td_retval[1] = reg_edx;
601 	td->td_retval[0] = 0;
602 	return (0);
603 }
604 
605 int
606 linux_ioperm(struct thread *td, struct linux_ioperm_args *args)
607 {
608 	int error;
609 	struct i386_ioperm_args iia;
610 
611 	iia.start = args->start;
612 	iia.length = args->length;
613 	iia.enable = args->enable;
614 	mtx_lock(&Giant);
615 	error = i386_set_ioperm(td, &iia);
616 	mtx_unlock(&Giant);
617 	return (error);
618 }
619 
620 int
621 linux_iopl(struct thread *td, struct linux_iopl_args *args)
622 {
623 	int error;
624 
625 	if (args->level < 0 || args->level > 3)
626 		return (EINVAL);
627 	if ((error = suser(td)) != 0)
628 		return (error);
629 	if ((error = securelevel_gt(td->td_ucred, 0)) != 0)
630 		return (error);
631 	td->td_frame->tf_eflags = (td->td_frame->tf_eflags & ~PSL_IOPL) |
632 	    (args->level * (PSL_IOPL / 3));
633 	return (0);
634 }
635 
636 int
637 linux_modify_ldt(struct thread *td, struct linux_modify_ldt_args *uap)
638 {
639 	int error;
640 	struct i386_ldt_args ldt;
641 	struct l_descriptor ld;
642 	union descriptor desc;
643 
644 	if (uap->ptr == NULL)
645 		return (EINVAL);
646 
647 	switch (uap->func) {
648 	case 0x00: /* read_ldt */
649 		ldt.start = 0;
650 		ldt.descs = uap->ptr;
651 		ldt.num = uap->bytecount / sizeof(union descriptor);
652 		mtx_lock(&Giant);
653 		error = i386_get_ldt(td, &ldt);
654 		td->td_retval[0] *= sizeof(union descriptor);
655 		mtx_unlock(&Giant);
656 		break;
657 	case 0x01: /* write_ldt */
658 	case 0x11: /* write_ldt */
659 		if (uap->bytecount != sizeof(ld))
660 			return (EINVAL);
661 
662 		error = copyin(uap->ptr, &ld, sizeof(ld));
663 		if (error)
664 			return (error);
665 
666 		ldt.start = ld.entry_number;
667 		ldt.descs = &desc;
668 		ldt.num = 1;
669 		desc.sd.sd_lolimit = (ld.limit & 0x0000ffff);
670 		desc.sd.sd_hilimit = (ld.limit & 0x000f0000) >> 16;
671 		desc.sd.sd_lobase = (ld.base_addr & 0x00ffffff);
672 		desc.sd.sd_hibase = (ld.base_addr & 0xff000000) >> 24;
673 		desc.sd.sd_type = SDT_MEMRO | ((ld.read_exec_only ^ 1) << 1) |
674 			(ld.contents << 2);
675 		desc.sd.sd_dpl = 3;
676 		desc.sd.sd_p = (ld.seg_not_present ^ 1);
677 		desc.sd.sd_xx = 0;
678 		desc.sd.sd_def32 = ld.seg_32bit;
679 		desc.sd.sd_gran = ld.limit_in_pages;
680 		mtx_lock(&Giant);
681 		error = i386_set_ldt(td, &ldt, &desc);
682 		mtx_unlock(&Giant);
683 		break;
684 	default:
685 		error = EINVAL;
686 		break;
687 	}
688 
689 	if (error == EOPNOTSUPP) {
690 		printf("linux: modify_ldt needs kernel option USER_LDT\n");
691 		error = ENOSYS;
692 	}
693 
694 	return (error);
695 }
696 
697 int
698 linux_sigaction(struct thread *td, struct linux_sigaction_args *args)
699 {
700 	l_osigaction_t osa;
701 	l_sigaction_t act, oact;
702 	int error;
703 
704 #ifdef DEBUG
705 	if (ldebug(sigaction))
706 		printf(ARGS(sigaction, "%d, %p, %p"),
707 		    args->sig, (void *)args->nsa, (void *)args->osa);
708 #endif
709 
710 	if (args->nsa != NULL) {
711 		error = copyin(args->nsa, &osa, sizeof(l_osigaction_t));
712 		if (error)
713 			return (error);
714 		act.lsa_handler = osa.lsa_handler;
715 		act.lsa_flags = osa.lsa_flags;
716 		act.lsa_restorer = osa.lsa_restorer;
717 		LINUX_SIGEMPTYSET(act.lsa_mask);
718 		act.lsa_mask.__bits[0] = osa.lsa_mask;
719 	}
720 
721 	error = linux_do_sigaction(td, args->sig, args->nsa ? &act : NULL,
722 	    args->osa ? &oact : NULL);
723 
724 	if (args->osa != NULL && !error) {
725 		osa.lsa_handler = oact.lsa_handler;
726 		osa.lsa_flags = oact.lsa_flags;
727 		osa.lsa_restorer = oact.lsa_restorer;
728 		osa.lsa_mask = oact.lsa_mask.__bits[0];
729 		error = copyout(&osa, args->osa, sizeof(l_osigaction_t));
730 	}
731 
732 	return (error);
733 }
734 
735 /*
736  * Linux has two extra args, restart and oldmask.  We dont use these,
737  * but it seems that "restart" is actually a context pointer that
738  * enables the signal to happen with a different register set.
739  */
740 int
741 linux_sigsuspend(struct thread *td, struct linux_sigsuspend_args *args)
742 {
743 	sigset_t sigmask;
744 	l_sigset_t mask;
745 
746 #ifdef DEBUG
747 	if (ldebug(sigsuspend))
748 		printf(ARGS(sigsuspend, "%08lx"), (unsigned long)args->mask);
749 #endif
750 
751 	LINUX_SIGEMPTYSET(mask);
752 	mask.__bits[0] = args->mask;
753 	linux_to_bsd_sigset(&mask, &sigmask);
754 	return (kern_sigsuspend(td, sigmask));
755 }
756 
757 int
758 linux_rt_sigsuspend(struct thread *td, struct linux_rt_sigsuspend_args *uap)
759 {
760 	l_sigset_t lmask;
761 	sigset_t sigmask;
762 	int error;
763 
764 #ifdef DEBUG
765 	if (ldebug(rt_sigsuspend))
766 		printf(ARGS(rt_sigsuspend, "%p, %d"),
767 		    (void *)uap->newset, uap->sigsetsize);
768 #endif
769 
770 	if (uap->sigsetsize != sizeof(l_sigset_t))
771 		return (EINVAL);
772 
773 	error = copyin(uap->newset, &lmask, sizeof(l_sigset_t));
774 	if (error)
775 		return (error);
776 
777 	linux_to_bsd_sigset(&lmask, &sigmask);
778 	return (kern_sigsuspend(td, sigmask));
779 }
780 
781 int
782 linux_pause(struct thread *td, struct linux_pause_args *args)
783 {
784 	struct proc *p = td->td_proc;
785 	sigset_t sigmask;
786 
787 #ifdef DEBUG
788 	if (ldebug(pause))
789 		printf(ARGS(pause, ""));
790 #endif
791 
792 	PROC_LOCK(p);
793 	sigmask = td->td_sigmask;
794 	PROC_UNLOCK(p);
795 	return (kern_sigsuspend(td, sigmask));
796 }
797 
798 int
799 linux_sigaltstack(struct thread *td, struct linux_sigaltstack_args *uap)
800 {
801 	stack_t ss, oss;
802 	l_stack_t lss;
803 	int error;
804 
805 #ifdef DEBUG
806 	if (ldebug(sigaltstack))
807 		printf(ARGS(sigaltstack, "%p, %p"), uap->uss, uap->uoss);
808 #endif
809 
810 	if (uap->uss != NULL) {
811 		error = copyin(uap->uss, &lss, sizeof(l_stack_t));
812 		if (error)
813 			return (error);
814 
815 		ss.ss_sp = lss.ss_sp;
816 		ss.ss_size = lss.ss_size;
817 		ss.ss_flags = linux_to_bsd_sigaltstack(lss.ss_flags);
818 	}
819 	error = kern_sigaltstack(td, (uap->uss != NULL) ? &ss : NULL,
820 	    (uap->uoss != NULL) ? &oss : NULL);
821 	if (!error && uap->uoss != NULL) {
822 		lss.ss_sp = oss.ss_sp;
823 		lss.ss_size = oss.ss_size;
824 		lss.ss_flags = bsd_to_linux_sigaltstack(oss.ss_flags);
825 		error = copyout(&lss, uap->uoss, sizeof(l_stack_t));
826 	}
827 
828 	return (error);
829 }
830 
831 int
832 linux_ftruncate64(struct thread *td, struct linux_ftruncate64_args *args)
833 {
834 	struct ftruncate_args sa;
835 
836 #ifdef DEBUG
837 	if (ldebug(ftruncate64))
838 		printf(ARGS(ftruncate64, "%u, %jd"), args->fd,
839 		    (intmax_t)args->length);
840 #endif
841 
842 	sa.fd = args->fd;
843 	sa.pad = 0;
844 	sa.length = args->length;
845 	return ftruncate(td, &sa);
846 }
847 
848 int
849 linux_set_thread_area(struct thread *td, struct linux_set_thread_area_args *args)
850 {
851 	/*
852 	 * Return an error code instead of raising a SIGSYS so that
853 	 * the caller will fall back to simpler LDT methods.
854 	 */
855 	return (ENOSYS);
856 }
857 
858 int
859 linux_gettid(struct thread *td, struct linux_gettid_args *args)
860 {
861 
862 	td->td_retval[0] = td->td_proc->p_pid;
863 	return (0);
864 }
865 
866 int
867 linux_tkill(struct thread *td, struct linux_tkill_args *args)
868 {
869 
870 	return (linux_kill(td, (struct linux_kill_args *) args));
871 }
872 
873