xref: /freebsd/sys/amd64/linux32/linux32_machdep.c (revision aa64588d28258aef88cc33b8043112e8856948d0)
1 /*-
2  * Copyright (c) 2004 Tim J. Robbins
3  * Copyright (c) 2002 Doug Rabson
4  * Copyright (c) 2000 Marcel Moolenaar
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer
12  *    in this position and unchanged.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. The name of the author may not be used to endorse or promote products
17  *    derived from this software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33 
34 #include <sys/param.h>
35 #include <sys/kernel.h>
36 #include <sys/systm.h>
37 #include <sys/file.h>
38 #include <sys/fcntl.h>
39 #include <sys/clock.h>
40 #include <sys/imgact.h>
41 #include <sys/limits.h>
42 #include <sys/lock.h>
43 #include <sys/malloc.h>
44 #include <sys/mman.h>
45 #include <sys/mutex.h>
46 #include <sys/priv.h>
47 #include <sys/proc.h>
48 #include <sys/resource.h>
49 #include <sys/resourcevar.h>
50 #include <sys/sched.h>
51 #include <sys/syscallsubr.h>
52 #include <sys/sysproto.h>
53 #include <sys/unistd.h>
54 
55 #include <machine/frame.h>
56 #include <machine/pcb.h>
57 #include <machine/psl.h>
58 #include <machine/segments.h>
59 #include <machine/specialreg.h>
60 
61 #include <vm/vm.h>
62 #include <vm/pmap.h>
63 #include <vm/vm_extern.h>
64 #include <vm/vm_kern.h>
65 #include <vm/vm_map.h>
66 
67 #include <amd64/linux32/linux.h>
68 #include <amd64/linux32/linux32_proto.h>
69 #include <compat/linux/linux_ipc.h>
70 #include <compat/linux/linux_signal.h>
71 #include <compat/linux/linux_util.h>
72 #include <compat/linux/linux_emul.h>
73 
74 struct l_old_select_argv {
75 	l_int		nfds;
76 	l_uintptr_t	readfds;
77 	l_uintptr_t	writefds;
78 	l_uintptr_t	exceptfds;
79 	l_uintptr_t	timeout;
80 } __packed;
81 
82 int
83 linux_to_bsd_sigaltstack(int lsa)
84 {
85 	int bsa = 0;
86 
87 	if (lsa & LINUX_SS_DISABLE)
88 		bsa |= SS_DISABLE;
89 	if (lsa & LINUX_SS_ONSTACK)
90 		bsa |= SS_ONSTACK;
91 	return (bsa);
92 }
93 
94 static int	linux_mmap_common(struct thread *td, l_uintptr_t addr,
95 		    l_size_t len, l_int prot, l_int flags, l_int fd,
96 		    l_loff_t pos);
97 
98 int
99 bsd_to_linux_sigaltstack(int bsa)
100 {
101 	int lsa = 0;
102 
103 	if (bsa & SS_DISABLE)
104 		lsa |= LINUX_SS_DISABLE;
105 	if (bsa & SS_ONSTACK)
106 		lsa |= LINUX_SS_ONSTACK;
107 	return (lsa);
108 }
109 
110 /*
111  * Custom version of exec_copyin_args() so that we can translate
112  * the pointers.
113  */
114 static int
115 linux_exec_copyin_args(struct image_args *args, char *fname,
116     enum uio_seg segflg, char **argv, char **envv)
117 {
118 	char *argp, *envp;
119 	u_int32_t *p32, arg;
120 	size_t length;
121 	int error;
122 
123 	bzero(args, sizeof(*args));
124 	if (argv == NULL)
125 		return (EFAULT);
126 
127 	/*
128 	 * Allocate temporary demand zeroed space for argument and
129 	 *	environment strings
130 	 */
131 	args->buf = (char *)kmem_alloc_wait(exec_map,
132 	    PATH_MAX + ARG_MAX + MAXSHELLCMDLEN);
133 	if (args->buf == NULL)
134 		return (ENOMEM);
135 	args->begin_argv = args->buf;
136 	args->endp = args->begin_argv;
137 	args->stringspace = ARG_MAX;
138 
139 	args->fname = args->buf + ARG_MAX;
140 
141 	/*
142 	 * Copy the file name.
143 	 */
144 	error = (segflg == UIO_SYSSPACE) ?
145 	    copystr(fname, args->fname, PATH_MAX, &length) :
146 	    copyinstr(fname, args->fname, PATH_MAX, &length);
147 	if (error != 0)
148 		goto err_exit;
149 
150 	/*
151 	 * extract arguments first
152 	 */
153 	p32 = (u_int32_t *)argv;
154 	for (;;) {
155 		error = copyin(p32++, &arg, sizeof(arg));
156 		if (error)
157 			goto err_exit;
158 		if (arg == 0)
159 			break;
160 		argp = PTRIN(arg);
161 		error = copyinstr(argp, args->endp, args->stringspace, &length);
162 		if (error) {
163 			if (error == ENAMETOOLONG)
164 				error = E2BIG;
165 
166 			goto err_exit;
167 		}
168 		args->stringspace -= length;
169 		args->endp += length;
170 		args->argc++;
171 	}
172 
173 	args->begin_envv = args->endp;
174 
175 	/*
176 	 * extract environment strings
177 	 */
178 	if (envv) {
179 		p32 = (u_int32_t *)envv;
180 		for (;;) {
181 			error = copyin(p32++, &arg, sizeof(arg));
182 			if (error)
183 				goto err_exit;
184 			if (arg == 0)
185 				break;
186 			envp = PTRIN(arg);
187 			error = copyinstr(envp, args->endp, args->stringspace,
188 			    &length);
189 			if (error) {
190 				if (error == ENAMETOOLONG)
191 					error = E2BIG;
192 				goto err_exit;
193 			}
194 			args->stringspace -= length;
195 			args->endp += length;
196 			args->envc++;
197 		}
198 	}
199 
200 	return (0);
201 
202 err_exit:
203 	kmem_free_wakeup(exec_map, (vm_offset_t)args->buf,
204 	    PATH_MAX + ARG_MAX + MAXSHELLCMDLEN);
205 	args->buf = NULL;
206 	return (error);
207 }
208 
209 int
210 linux_execve(struct thread *td, struct linux_execve_args *args)
211 {
212 	struct image_args eargs;
213 	char *path;
214 	int error;
215 
216 	LCONVPATHEXIST(td, args->path, &path);
217 
218 #ifdef DEBUG
219 	if (ldebug(execve))
220 		printf(ARGS(execve, "%s"), path);
221 #endif
222 
223 	error = linux_exec_copyin_args(&eargs, path, UIO_SYSSPACE, args->argp,
224 	    args->envp);
225 	free(path, M_TEMP);
226 	if (error == 0)
227 		error = kern_execve(td, &eargs, NULL);
228 	if (error == 0)
229 		/* Linux process can execute FreeBSD one, do not attempt
230 		 * to create emuldata for such process using
231 		 * linux_proc_init, this leads to a panic on KASSERT
232 		 * because such process has p->p_emuldata == NULL.
233 		 */
234 	   	if (td->td_proc->p_sysent == &elf_linux_sysvec)
235 			error = linux_proc_init(td, 0, 0);
236 	return (error);
237 }
238 
239 CTASSERT(sizeof(struct l_iovec32) == 8);
240 
241 static int
242 linux32_copyinuio(struct l_iovec32 *iovp, l_ulong iovcnt, struct uio **uiop)
243 {
244 	struct l_iovec32 iov32;
245 	struct iovec *iov;
246 	struct uio *uio;
247 	uint32_t iovlen;
248 	int error, i;
249 
250 	*uiop = NULL;
251 	if (iovcnt > UIO_MAXIOV)
252 		return (EINVAL);
253 	iovlen = iovcnt * sizeof(struct iovec);
254 	uio = malloc(iovlen + sizeof(*uio), M_IOV, M_WAITOK);
255 	iov = (struct iovec *)(uio + 1);
256 	for (i = 0; i < iovcnt; i++) {
257 		error = copyin(&iovp[i], &iov32, sizeof(struct l_iovec32));
258 		if (error) {
259 			free(uio, M_IOV);
260 			return (error);
261 		}
262 		iov[i].iov_base = PTRIN(iov32.iov_base);
263 		iov[i].iov_len = iov32.iov_len;
264 	}
265 	uio->uio_iov = iov;
266 	uio->uio_iovcnt = iovcnt;
267 	uio->uio_segflg = UIO_USERSPACE;
268 	uio->uio_offset = -1;
269 	uio->uio_resid = 0;
270 	for (i = 0; i < iovcnt; i++) {
271 		if (iov->iov_len > INT_MAX - uio->uio_resid) {
272 			free(uio, M_IOV);
273 			return (EINVAL);
274 		}
275 		uio->uio_resid += iov->iov_len;
276 		iov++;
277 	}
278 	*uiop = uio;
279 	return (0);
280 }
281 
282 int
283 linux32_copyiniov(struct l_iovec32 *iovp32, l_ulong iovcnt, struct iovec **iovp,
284     int error)
285 {
286 	struct l_iovec32 iov32;
287 	struct iovec *iov;
288 	uint32_t iovlen;
289 	int i;
290 
291 	*iovp = NULL;
292 	if (iovcnt > UIO_MAXIOV)
293 		return (error);
294 	iovlen = iovcnt * sizeof(struct iovec);
295 	iov = malloc(iovlen, M_IOV, M_WAITOK);
296 	for (i = 0; i < iovcnt; i++) {
297 		error = copyin(&iovp32[i], &iov32, sizeof(struct l_iovec32));
298 		if (error) {
299 			free(iov, M_IOV);
300 			return (error);
301 		}
302 		iov[i].iov_base = PTRIN(iov32.iov_base);
303 		iov[i].iov_len = iov32.iov_len;
304 	}
305 	*iovp = iov;
306 	return(0);
307 
308 }
309 
310 int
311 linux_readv(struct thread *td, struct linux_readv_args *uap)
312 {
313 	struct uio *auio;
314 	int error;
315 
316 	error = linux32_copyinuio(uap->iovp, uap->iovcnt, &auio);
317 	if (error)
318 		return (error);
319 	error = kern_readv(td, uap->fd, auio);
320 	free(auio, M_IOV);
321 	return (error);
322 }
323 
324 int
325 linux_writev(struct thread *td, struct linux_writev_args *uap)
326 {
327 	struct uio *auio;
328 	int error;
329 
330 	error = linux32_copyinuio(uap->iovp, uap->iovcnt, &auio);
331 	if (error)
332 		return (error);
333 	error = kern_writev(td, uap->fd, auio);
334 	free(auio, M_IOV);
335 	return (error);
336 }
337 
338 struct l_ipc_kludge {
339 	l_uintptr_t msgp;
340 	l_long msgtyp;
341 } __packed;
342 
343 int
344 linux_ipc(struct thread *td, struct linux_ipc_args *args)
345 {
346 
347 	switch (args->what & 0xFFFF) {
348 	case LINUX_SEMOP: {
349 		struct linux_semop_args a;
350 
351 		a.semid = args->arg1;
352 		a.tsops = args->ptr;
353 		a.nsops = args->arg2;
354 		return (linux_semop(td, &a));
355 	}
356 	case LINUX_SEMGET: {
357 		struct linux_semget_args a;
358 
359 		a.key = args->arg1;
360 		a.nsems = args->arg2;
361 		a.semflg = args->arg3;
362 		return (linux_semget(td, &a));
363 	}
364 	case LINUX_SEMCTL: {
365 		struct linux_semctl_args a;
366 		int error;
367 
368 		a.semid = args->arg1;
369 		a.semnum = args->arg2;
370 		a.cmd = args->arg3;
371 		error = copyin(args->ptr, &a.arg, sizeof(a.arg));
372 		if (error)
373 			return (error);
374 		return (linux_semctl(td, &a));
375 	}
376 	case LINUX_MSGSND: {
377 		struct linux_msgsnd_args a;
378 
379 		a.msqid = args->arg1;
380 		a.msgp = args->ptr;
381 		a.msgsz = args->arg2;
382 		a.msgflg = args->arg3;
383 		return (linux_msgsnd(td, &a));
384 	}
385 	case LINUX_MSGRCV: {
386 		struct linux_msgrcv_args a;
387 
388 		a.msqid = args->arg1;
389 		a.msgsz = args->arg2;
390 		a.msgflg = args->arg3;
391 		if ((args->what >> 16) == 0) {
392 			struct l_ipc_kludge tmp;
393 			int error;
394 
395 			if (args->ptr == 0)
396 				return (EINVAL);
397 			error = copyin(args->ptr, &tmp, sizeof(tmp));
398 			if (error)
399 				return (error);
400 			a.msgp = PTRIN(tmp.msgp);
401 			a.msgtyp = tmp.msgtyp;
402 		} else {
403 			a.msgp = args->ptr;
404 			a.msgtyp = args->arg5;
405 		}
406 		return (linux_msgrcv(td, &a));
407 	}
408 	case LINUX_MSGGET: {
409 		struct linux_msgget_args a;
410 
411 		a.key = args->arg1;
412 		a.msgflg = args->arg2;
413 		return (linux_msgget(td, &a));
414 	}
415 	case LINUX_MSGCTL: {
416 		struct linux_msgctl_args a;
417 
418 		a.msqid = args->arg1;
419 		a.cmd = args->arg2;
420 		a.buf = args->ptr;
421 		return (linux_msgctl(td, &a));
422 	}
423 	case LINUX_SHMAT: {
424 		struct linux_shmat_args a;
425 
426 		a.shmid = args->arg1;
427 		a.shmaddr = args->ptr;
428 		a.shmflg = args->arg2;
429 		a.raddr = PTRIN((l_uint)args->arg3);
430 		return (linux_shmat(td, &a));
431 	}
432 	case LINUX_SHMDT: {
433 		struct linux_shmdt_args a;
434 
435 		a.shmaddr = args->ptr;
436 		return (linux_shmdt(td, &a));
437 	}
438 	case LINUX_SHMGET: {
439 		struct linux_shmget_args a;
440 
441 		a.key = args->arg1;
442 		a.size = args->arg2;
443 		a.shmflg = args->arg3;
444 		return (linux_shmget(td, &a));
445 	}
446 	case LINUX_SHMCTL: {
447 		struct linux_shmctl_args a;
448 
449 		a.shmid = args->arg1;
450 		a.cmd = args->arg2;
451 		a.buf = args->ptr;
452 		return (linux_shmctl(td, &a));
453 	}
454 	default:
455 		break;
456 	}
457 
458 	return (EINVAL);
459 }
460 
461 int
462 linux_old_select(struct thread *td, struct linux_old_select_args *args)
463 {
464 	struct l_old_select_argv linux_args;
465 	struct linux_select_args newsel;
466 	int error;
467 
468 #ifdef DEBUG
469 	if (ldebug(old_select))
470 		printf(ARGS(old_select, "%p"), args->ptr);
471 #endif
472 
473 	error = copyin(args->ptr, &linux_args, sizeof(linux_args));
474 	if (error)
475 		return (error);
476 
477 	newsel.nfds = linux_args.nfds;
478 	newsel.readfds = PTRIN(linux_args.readfds);
479 	newsel.writefds = PTRIN(linux_args.writefds);
480 	newsel.exceptfds = PTRIN(linux_args.exceptfds);
481 	newsel.timeout = PTRIN(linux_args.timeout);
482 	return (linux_select(td, &newsel));
483 }
484 
485 int
486 linux_fork(struct thread *td, struct linux_fork_args *args)
487 {
488 	int error;
489 	struct proc *p2;
490 	struct thread *td2;
491 
492 #ifdef DEBUG
493 	if (ldebug(fork))
494 		printf(ARGS(fork, ""));
495 #endif
496 
497 	if ((error = fork1(td, RFFDG | RFPROC | RFSTOPPED, 0, &p2)) != 0)
498 		return (error);
499 
500 	if (error == 0) {
501 		td->td_retval[0] = p2->p_pid;
502 		td->td_retval[1] = 0;
503 	}
504 
505 	if (td->td_retval[1] == 1)
506 		td->td_retval[0] = 0;
507 	error = linux_proc_init(td, td->td_retval[0], 0);
508 	if (error)
509 		return (error);
510 
511 	td2 = FIRST_THREAD_IN_PROC(p2);
512 
513 	/*
514 	 * Make this runnable after we are finished with it.
515 	 */
516 	thread_lock(td2);
517 	TD_SET_CAN_RUN(td2);
518 	sched_add(td2, SRQ_BORING);
519 	thread_unlock(td2);
520 
521 	return (0);
522 }
523 
524 int
525 linux_vfork(struct thread *td, struct linux_vfork_args *args)
526 {
527 	int error;
528 	struct proc *p2;
529 	struct thread *td2;
530 
531 #ifdef DEBUG
532 	if (ldebug(vfork))
533 		printf(ARGS(vfork, ""));
534 #endif
535 
536 	/* Exclude RFPPWAIT */
537 	if ((error = fork1(td, RFFDG | RFPROC | RFMEM | RFSTOPPED, 0, &p2)) != 0)
538 		return (error);
539 	if (error == 0) {
540 	   	td->td_retval[0] = p2->p_pid;
541 		td->td_retval[1] = 0;
542 	}
543 	/* Are we the child? */
544 	if (td->td_retval[1] == 1)
545 		td->td_retval[0] = 0;
546 	error = linux_proc_init(td, td->td_retval[0], 0);
547 	if (error)
548 		return (error);
549 
550 	PROC_LOCK(p2);
551 	p2->p_flag |= P_PPWAIT;
552 	PROC_UNLOCK(p2);
553 
554 	td2 = FIRST_THREAD_IN_PROC(p2);
555 
556 	/*
557 	 * Make this runnable after we are finished with it.
558 	 */
559 	thread_lock(td2);
560 	TD_SET_CAN_RUN(td2);
561 	sched_add(td2, SRQ_BORING);
562 	thread_unlock(td2);
563 
564 	/* wait for the children to exit, ie. emulate vfork */
565 	PROC_LOCK(p2);
566 	while (p2->p_flag & P_PPWAIT)
567 		cv_wait(&p2->p_pwait, &p2->p_mtx);
568 	PROC_UNLOCK(p2);
569 
570 	return (0);
571 }
572 
573 int
574 linux_clone(struct thread *td, struct linux_clone_args *args)
575 {
576 	int error, ff = RFPROC | RFSTOPPED;
577 	struct proc *p2;
578 	struct thread *td2;
579 	int exit_signal;
580 	struct linux_emuldata *em;
581 
582 #ifdef DEBUG
583 	if (ldebug(clone)) {
584 		printf(ARGS(clone, "flags %x, stack %p, parent tid: %p, "
585 		    "child tid: %p"), (unsigned)args->flags,
586 		    args->stack, args->parent_tidptr, args->child_tidptr);
587 	}
588 #endif
589 
590 	exit_signal = args->flags & 0x000000ff;
591 	if (LINUX_SIG_VALID(exit_signal)) {
592 		if (exit_signal <= LINUX_SIGTBLSZ)
593 			exit_signal =
594 			    linux_to_bsd_signal[_SIG_IDX(exit_signal)];
595 	} else if (exit_signal != 0)
596 		return (EINVAL);
597 
598 	if (args->flags & LINUX_CLONE_VM)
599 		ff |= RFMEM;
600 	if (args->flags & LINUX_CLONE_SIGHAND)
601 		ff |= RFSIGSHARE;
602 	/*
603 	 * XXX: In Linux, sharing of fs info (chroot/cwd/umask)
604 	 * and open files is independant.  In FreeBSD, its in one
605 	 * structure but in reality it does not cause any problems
606 	 * because both of these flags are usually set together.
607 	 */
608 	if (!(args->flags & (LINUX_CLONE_FILES | LINUX_CLONE_FS)))
609 		ff |= RFFDG;
610 
611 	/*
612 	 * Attempt to detect when linux_clone(2) is used for creating
613 	 * kernel threads. Unfortunately despite the existence of the
614 	 * CLONE_THREAD flag, version of linuxthreads package used in
615 	 * most popular distros as of beginning of 2005 doesn't make
616 	 * any use of it. Therefore, this detection relies on
617 	 * empirical observation that linuxthreads sets certain
618 	 * combination of flags, so that we can make more or less
619 	 * precise detection and notify the FreeBSD kernel that several
620 	 * processes are in fact part of the same threading group, so
621 	 * that special treatment is necessary for signal delivery
622 	 * between those processes and fd locking.
623 	 */
624 	if ((args->flags & 0xffffff00) == LINUX_THREADING_FLAGS)
625 		ff |= RFTHREAD;
626 
627 	if (args->flags & LINUX_CLONE_PARENT_SETTID)
628 		if (args->parent_tidptr == NULL)
629 			return (EINVAL);
630 
631 	error = fork1(td, ff, 0, &p2);
632 	if (error)
633 		return (error);
634 
635 	if (args->flags & (LINUX_CLONE_PARENT | LINUX_CLONE_THREAD)) {
636 	   	sx_xlock(&proctree_lock);
637 		PROC_LOCK(p2);
638 		proc_reparent(p2, td->td_proc->p_pptr);
639 		PROC_UNLOCK(p2);
640 		sx_xunlock(&proctree_lock);
641 	}
642 
643 	/* create the emuldata */
644 	error = linux_proc_init(td, p2->p_pid, args->flags);
645 	/* reference it - no need to check this */
646 	em = em_find(p2, EMUL_DOLOCK);
647 	KASSERT(em != NULL, ("clone: emuldata not found.\n"));
648 	/* and adjust it */
649 
650 	if (args->flags & LINUX_CLONE_THREAD) {
651 #ifdef notyet
652 	   	PROC_LOCK(p2);
653 	   	p2->p_pgrp = td->td_proc->p_pgrp;
654 	   	PROC_UNLOCK(p2);
655 #endif
656 		exit_signal = 0;
657 	}
658 
659 	if (args->flags & LINUX_CLONE_CHILD_SETTID)
660 		em->child_set_tid = args->child_tidptr;
661 	else
662 	   	em->child_set_tid = NULL;
663 
664 	if (args->flags & LINUX_CLONE_CHILD_CLEARTID)
665 		em->child_clear_tid = args->child_tidptr;
666 	else
667 	   	em->child_clear_tid = NULL;
668 
669 	EMUL_UNLOCK(&emul_lock);
670 
671 	if (args->flags & LINUX_CLONE_PARENT_SETTID) {
672 		error = copyout(&p2->p_pid, args->parent_tidptr,
673 		    sizeof(p2->p_pid));
674 		if (error)
675 			printf(LMSG("copyout failed!"));
676 	}
677 
678 	PROC_LOCK(p2);
679 	p2->p_sigparent = exit_signal;
680 	PROC_UNLOCK(p2);
681 	td2 = FIRST_THREAD_IN_PROC(p2);
682 	/*
683 	 * In a case of stack = NULL, we are supposed to COW calling process
684 	 * stack. This is what normal fork() does, so we just keep tf_rsp arg
685 	 * intact.
686 	 */
687 	if (args->stack)
688 		td2->td_frame->tf_rsp = PTROUT(args->stack);
689 
690 	if (args->flags & LINUX_CLONE_SETTLS) {
691 		struct user_segment_descriptor sd;
692 		struct l_user_desc info;
693 		int a[2];
694 
695 		error = copyin((void *)td->td_frame->tf_rsi, &info,
696 		    sizeof(struct l_user_desc));
697 		if (error) {
698 			printf(LMSG("copyin failed!"));
699 		} else {
700 			/* We might copy out the entry_number as GUGS32_SEL. */
701 			info.entry_number = GUGS32_SEL;
702 			error = copyout(&info, (void *)td->td_frame->tf_rsi,
703 			    sizeof(struct l_user_desc));
704 			if (error)
705 				printf(LMSG("copyout failed!"));
706 
707 			a[0] = LINUX_LDT_entry_a(&info);
708 			a[1] = LINUX_LDT_entry_b(&info);
709 
710 			memcpy(&sd, &a, sizeof(a));
711 #ifdef DEBUG
712 			if (ldebug(clone))
713 				printf("Segment created in clone with "
714 				    "CLONE_SETTLS: lobase: %x, hibase: %x, "
715 				    "lolimit: %x, hilimit: %x, type: %i, "
716 				    "dpl: %i, p: %i, xx: %i, long: %i, "
717 				    "def32: %i, gran: %i\n", sd.sd_lobase,
718 				    sd.sd_hibase, sd.sd_lolimit, sd.sd_hilimit,
719 				    sd.sd_type, sd.sd_dpl, sd.sd_p, sd.sd_xx,
720 				    sd.sd_long, sd.sd_def32, sd.sd_gran);
721 #endif
722 			td2->td_pcb->pcb_gsbase = (register_t)info.base_addr;
723 /* XXXKIB		td2->td_pcb->pcb_gs32sd = sd; */
724 			td2->td_frame->tf_gs = GSEL(GUGS32_SEL, SEL_UPL);
725 			td2->td_pcb->pcb_flags |= PCB_GS32BIT | PCB_32BIT;
726 		}
727 	}
728 
729 #ifdef DEBUG
730 	if (ldebug(clone))
731 		printf(LMSG("clone: successful rfork to %d, "
732 		    "stack %p sig = %d"), (int)p2->p_pid, args->stack,
733 		    exit_signal);
734 #endif
735 	if (args->flags & LINUX_CLONE_VFORK) {
736 	   	PROC_LOCK(p2);
737 	   	p2->p_flag |= P_PPWAIT;
738 	   	PROC_UNLOCK(p2);
739 	}
740 
741 	/*
742 	 * Make this runnable after we are finished with it.
743 	 */
744 	thread_lock(td2);
745 	TD_SET_CAN_RUN(td2);
746 	sched_add(td2, SRQ_BORING);
747 	thread_unlock(td2);
748 
749 	td->td_retval[0] = p2->p_pid;
750 	td->td_retval[1] = 0;
751 
752 	if (args->flags & LINUX_CLONE_VFORK) {
753 		/* wait for the children to exit, ie. emulate vfork */
754 		PROC_LOCK(p2);
755 		while (p2->p_flag & P_PPWAIT)
756 			cv_wait(&p2->p_pwait, &p2->p_mtx);
757 		PROC_UNLOCK(p2);
758 	}
759 
760 	return (0);
761 }
762 
763 #define STACK_SIZE  (2 * 1024 * 1024)
764 #define GUARD_SIZE  (4 * PAGE_SIZE)
765 
766 int
767 linux_mmap2(struct thread *td, struct linux_mmap2_args *args)
768 {
769 
770 #ifdef DEBUG
771 	if (ldebug(mmap2))
772 		printf(ARGS(mmap2, "0x%08x, %d, %d, 0x%08x, %d, %d"),
773 		    args->addr, args->len, args->prot,
774 		    args->flags, args->fd, args->pgoff);
775 #endif
776 
777 	return (linux_mmap_common(td, PTROUT(args->addr), args->len, args->prot,
778 		args->flags, args->fd, (uint64_t)(uint32_t)args->pgoff *
779 		PAGE_SIZE));
780 }
781 
782 int
783 linux_mmap(struct thread *td, struct linux_mmap_args *args)
784 {
785 	int error;
786 	struct l_mmap_argv linux_args;
787 
788 	error = copyin(args->ptr, &linux_args, sizeof(linux_args));
789 	if (error)
790 		return (error);
791 
792 #ifdef DEBUG
793 	if (ldebug(mmap))
794 		printf(ARGS(mmap, "0x%08x, %d, %d, 0x%08x, %d, %d"),
795 		    linux_args.addr, linux_args.len, linux_args.prot,
796 		    linux_args.flags, linux_args.fd, linux_args.pgoff);
797 #endif
798 
799 	return (linux_mmap_common(td, linux_args.addr, linux_args.len,
800 	    linux_args.prot, linux_args.flags, linux_args.fd,
801 	    (uint32_t)linux_args.pgoff));
802 }
803 
804 static int
805 linux_mmap_common(struct thread *td, l_uintptr_t addr, l_size_t len, l_int prot,
806     l_int flags, l_int fd, l_loff_t pos)
807 {
808 	struct proc *p = td->td_proc;
809 	struct mmap_args /* {
810 		caddr_t addr;
811 		size_t len;
812 		int prot;
813 		int flags;
814 		int fd;
815 		long pad;
816 		off_t pos;
817 	} */ bsd_args;
818 	int error;
819 	struct file *fp;
820 
821 	error = 0;
822 	bsd_args.flags = 0;
823 	fp = NULL;
824 
825 	/*
826 	 * Linux mmap(2):
827 	 * You must specify exactly one of MAP_SHARED and MAP_PRIVATE
828 	 */
829 	if (!((flags & LINUX_MAP_SHARED) ^ (flags & LINUX_MAP_PRIVATE)))
830 		return (EINVAL);
831 
832 	if (flags & LINUX_MAP_SHARED)
833 		bsd_args.flags |= MAP_SHARED;
834 	if (flags & LINUX_MAP_PRIVATE)
835 		bsd_args.flags |= MAP_PRIVATE;
836 	if (flags & LINUX_MAP_FIXED)
837 		bsd_args.flags |= MAP_FIXED;
838 	if (flags & LINUX_MAP_ANON)
839 		bsd_args.flags |= MAP_ANON;
840 	else
841 		bsd_args.flags |= MAP_NOSYNC;
842 	if (flags & LINUX_MAP_GROWSDOWN)
843 		bsd_args.flags |= MAP_STACK;
844 
845 	/*
846 	 * PROT_READ, PROT_WRITE, or PROT_EXEC implies PROT_READ and PROT_EXEC
847 	 * on Linux/i386. We do this to ensure maximum compatibility.
848 	 * Linux/ia64 does the same in i386 emulation mode.
849 	 */
850 	bsd_args.prot = prot;
851 	if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC))
852 		bsd_args.prot |= PROT_READ | PROT_EXEC;
853 
854 	/* Linux does not check file descriptor when MAP_ANONYMOUS is set. */
855 	bsd_args.fd = (bsd_args.flags & MAP_ANON) ? -1 : fd;
856 	if (bsd_args.fd != -1) {
857 		/*
858 		 * Linux follows Solaris mmap(2) description:
859 		 * The file descriptor fildes is opened with
860 		 * read permission, regardless of the
861 		 * protection options specified.
862 		 */
863 
864 		if ((error = fget(td, bsd_args.fd, &fp)) != 0)
865 			return (error);
866 		if (fp->f_type != DTYPE_VNODE) {
867 			fdrop(fp, td);
868 			return (EINVAL);
869 		}
870 
871 		/* Linux mmap() just fails for O_WRONLY files */
872 		if (!(fp->f_flag & FREAD)) {
873 			fdrop(fp, td);
874 			return (EACCES);
875 		}
876 
877 		fdrop(fp, td);
878 	}
879 
880 	if (flags & LINUX_MAP_GROWSDOWN) {
881 		/*
882 		 * The Linux MAP_GROWSDOWN option does not limit auto
883 		 * growth of the region.  Linux mmap with this option
884 		 * takes as addr the inital BOS, and as len, the initial
885 		 * region size.  It can then grow down from addr without
886 		 * limit.  However, Linux threads has an implicit internal
887 		 * limit to stack size of STACK_SIZE.  Its just not
888 		 * enforced explicitly in Linux.  But, here we impose
889 		 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
890 		 * region, since we can do this with our mmap.
891 		 *
892 		 * Our mmap with MAP_STACK takes addr as the maximum
893 		 * downsize limit on BOS, and as len the max size of
894 		 * the region.  It then maps the top SGROWSIZ bytes,
895 		 * and auto grows the region down, up to the limit
896 		 * in addr.
897 		 *
898 		 * If we don't use the MAP_STACK option, the effect
899 		 * of this code is to allocate a stack region of a
900 		 * fixed size of (STACK_SIZE - GUARD_SIZE).
901 		 */
902 
903 		if ((caddr_t)PTRIN(addr) + len > p->p_vmspace->vm_maxsaddr) {
904 			/*
905 			 * Some Linux apps will attempt to mmap
906 			 * thread stacks near the top of their
907 			 * address space.  If their TOS is greater
908 			 * than vm_maxsaddr, vm_map_growstack()
909 			 * will confuse the thread stack with the
910 			 * process stack and deliver a SEGV if they
911 			 * attempt to grow the thread stack past their
912 			 * current stacksize rlimit.  To avoid this,
913 			 * adjust vm_maxsaddr upwards to reflect
914 			 * the current stacksize rlimit rather
915 			 * than the maximum possible stacksize.
916 			 * It would be better to adjust the
917 			 * mmap'ed region, but some apps do not check
918 			 * mmap's return value.
919 			 */
920 			PROC_LOCK(p);
921 			p->p_vmspace->vm_maxsaddr = (char *)LINUX32_USRSTACK -
922 			    lim_cur(p, RLIMIT_STACK);
923 			PROC_UNLOCK(p);
924 		}
925 
926 		/*
927 		 * This gives us our maximum stack size and a new BOS.
928 		 * If we're using VM_STACK, then mmap will just map
929 		 * the top SGROWSIZ bytes, and let the stack grow down
930 		 * to the limit at BOS.  If we're not using VM_STACK
931 		 * we map the full stack, since we don't have a way
932 		 * to autogrow it.
933 		 */
934 		if (len > STACK_SIZE - GUARD_SIZE) {
935 			bsd_args.addr = (caddr_t)PTRIN(addr);
936 			bsd_args.len = len;
937 		} else {
938 			bsd_args.addr = (caddr_t)PTRIN(addr) -
939 			    (STACK_SIZE - GUARD_SIZE - len);
940 			bsd_args.len = STACK_SIZE - GUARD_SIZE;
941 		}
942 	} else {
943 		bsd_args.addr = (caddr_t)PTRIN(addr);
944 		bsd_args.len  = len;
945 	}
946 	bsd_args.pos = pos;
947 
948 #ifdef DEBUG
949 	if (ldebug(mmap))
950 		printf("-> %s(%p, %d, %d, 0x%08x, %d, 0x%x)\n",
951 		    __func__,
952 		    (void *)bsd_args.addr, (int)bsd_args.len, bsd_args.prot,
953 		    bsd_args.flags, bsd_args.fd, (int)bsd_args.pos);
954 #endif
955 	error = mmap(td, &bsd_args);
956 #ifdef DEBUG
957 	if (ldebug(mmap))
958 		printf("-> %s() return: 0x%x (0x%08x)\n",
959 			__func__, error, (u_int)td->td_retval[0]);
960 #endif
961 	return (error);
962 }
963 
964 int
965 linux_mprotect(struct thread *td, struct linux_mprotect_args *uap)
966 {
967 	struct mprotect_args bsd_args;
968 
969 	bsd_args.addr = uap->addr;
970 	bsd_args.len = uap->len;
971 	bsd_args.prot = uap->prot;
972 	if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC))
973 		bsd_args.prot |= PROT_READ | PROT_EXEC;
974 	return (mprotect(td, &bsd_args));
975 }
976 
977 int
978 linux_iopl(struct thread *td, struct linux_iopl_args *args)
979 {
980 	int error;
981 
982 	if (args->level < 0 || args->level > 3)
983 		return (EINVAL);
984 	if ((error = priv_check(td, PRIV_IO)) != 0)
985 		return (error);
986 	if ((error = securelevel_gt(td->td_ucred, 0)) != 0)
987 		return (error);
988 	td->td_frame->tf_rflags = (td->td_frame->tf_rflags & ~PSL_IOPL) |
989 	    (args->level * (PSL_IOPL / 3));
990 
991 	return (0);
992 }
993 
994 int
995 linux_pipe(struct thread *td, struct linux_pipe_args *args)
996 {
997 	int error;
998 	int fildes[2];
999 
1000 #ifdef DEBUG
1001 	if (ldebug(pipe))
1002 		printf(ARGS(pipe, "*"));
1003 #endif
1004 
1005 	error = kern_pipe(td, fildes);
1006 	if (error)
1007 		return (error);
1008 
1009 	/* XXX: Close descriptors on error. */
1010 	return (copyout(fildes, args->pipefds, sizeof fildes));
1011 }
1012 
1013 int
1014 linux_sigaction(struct thread *td, struct linux_sigaction_args *args)
1015 {
1016 	l_osigaction_t osa;
1017 	l_sigaction_t act, oact;
1018 	int error;
1019 
1020 #ifdef DEBUG
1021 	if (ldebug(sigaction))
1022 		printf(ARGS(sigaction, "%d, %p, %p"),
1023 		    args->sig, (void *)args->nsa, (void *)args->osa);
1024 #endif
1025 
1026 	if (args->nsa != NULL) {
1027 		error = copyin(args->nsa, &osa, sizeof(l_osigaction_t));
1028 		if (error)
1029 			return (error);
1030 		act.lsa_handler = osa.lsa_handler;
1031 		act.lsa_flags = osa.lsa_flags;
1032 		act.lsa_restorer = osa.lsa_restorer;
1033 		LINUX_SIGEMPTYSET(act.lsa_mask);
1034 		act.lsa_mask.__bits[0] = osa.lsa_mask;
1035 	}
1036 
1037 	error = linux_do_sigaction(td, args->sig, args->nsa ? &act : NULL,
1038 	    args->osa ? &oact : NULL);
1039 
1040 	if (args->osa != NULL && !error) {
1041 		osa.lsa_handler = oact.lsa_handler;
1042 		osa.lsa_flags = oact.lsa_flags;
1043 		osa.lsa_restorer = oact.lsa_restorer;
1044 		osa.lsa_mask = oact.lsa_mask.__bits[0];
1045 		error = copyout(&osa, args->osa, sizeof(l_osigaction_t));
1046 	}
1047 
1048 	return (error);
1049 }
1050 
1051 /*
1052  * Linux has two extra args, restart and oldmask.  We don't use these,
1053  * but it seems that "restart" is actually a context pointer that
1054  * enables the signal to happen with a different register set.
1055  */
1056 int
1057 linux_sigsuspend(struct thread *td, struct linux_sigsuspend_args *args)
1058 {
1059 	sigset_t sigmask;
1060 	l_sigset_t mask;
1061 
1062 #ifdef DEBUG
1063 	if (ldebug(sigsuspend))
1064 		printf(ARGS(sigsuspend, "%08lx"), (unsigned long)args->mask);
1065 #endif
1066 
1067 	LINUX_SIGEMPTYSET(mask);
1068 	mask.__bits[0] = args->mask;
1069 	linux_to_bsd_sigset(&mask, &sigmask);
1070 	return (kern_sigsuspend(td, sigmask));
1071 }
1072 
1073 int
1074 linux_rt_sigsuspend(struct thread *td, struct linux_rt_sigsuspend_args *uap)
1075 {
1076 	l_sigset_t lmask;
1077 	sigset_t sigmask;
1078 	int error;
1079 
1080 #ifdef DEBUG
1081 	if (ldebug(rt_sigsuspend))
1082 		printf(ARGS(rt_sigsuspend, "%p, %d"),
1083 		    (void *)uap->newset, uap->sigsetsize);
1084 #endif
1085 
1086 	if (uap->sigsetsize != sizeof(l_sigset_t))
1087 		return (EINVAL);
1088 
1089 	error = copyin(uap->newset, &lmask, sizeof(l_sigset_t));
1090 	if (error)
1091 		return (error);
1092 
1093 	linux_to_bsd_sigset(&lmask, &sigmask);
1094 	return (kern_sigsuspend(td, sigmask));
1095 }
1096 
1097 int
1098 linux_pause(struct thread *td, struct linux_pause_args *args)
1099 {
1100 	struct proc *p = td->td_proc;
1101 	sigset_t sigmask;
1102 
1103 #ifdef DEBUG
1104 	if (ldebug(pause))
1105 		printf(ARGS(pause, ""));
1106 #endif
1107 
1108 	PROC_LOCK(p);
1109 	sigmask = td->td_sigmask;
1110 	PROC_UNLOCK(p);
1111 	return (kern_sigsuspend(td, sigmask));
1112 }
1113 
1114 int
1115 linux_sigaltstack(struct thread *td, struct linux_sigaltstack_args *uap)
1116 {
1117 	stack_t ss, oss;
1118 	l_stack_t lss;
1119 	int error;
1120 
1121 #ifdef DEBUG
1122 	if (ldebug(sigaltstack))
1123 		printf(ARGS(sigaltstack, "%p, %p"), uap->uss, uap->uoss);
1124 #endif
1125 
1126 	if (uap->uss != NULL) {
1127 		error = copyin(uap->uss, &lss, sizeof(l_stack_t));
1128 		if (error)
1129 			return (error);
1130 
1131 		ss.ss_sp = PTRIN(lss.ss_sp);
1132 		ss.ss_size = lss.ss_size;
1133 		ss.ss_flags = linux_to_bsd_sigaltstack(lss.ss_flags);
1134 	}
1135 	error = kern_sigaltstack(td, (uap->uss != NULL) ? &ss : NULL,
1136 	    (uap->uoss != NULL) ? &oss : NULL);
1137 	if (!error && uap->uoss != NULL) {
1138 		lss.ss_sp = PTROUT(oss.ss_sp);
1139 		lss.ss_size = oss.ss_size;
1140 		lss.ss_flags = bsd_to_linux_sigaltstack(oss.ss_flags);
1141 		error = copyout(&lss, uap->uoss, sizeof(l_stack_t));
1142 	}
1143 
1144 	return (error);
1145 }
1146 
1147 int
1148 linux_ftruncate64(struct thread *td, struct linux_ftruncate64_args *args)
1149 {
1150 	struct ftruncate_args sa;
1151 
1152 #ifdef DEBUG
1153 	if (ldebug(ftruncate64))
1154 		printf(ARGS(ftruncate64, "%u, %jd"), args->fd,
1155 		    (intmax_t)args->length);
1156 #endif
1157 
1158 	sa.fd = args->fd;
1159 	sa.length = args->length;
1160 	return ftruncate(td, &sa);
1161 }
1162 
1163 int
1164 linux_gettimeofday(struct thread *td, struct linux_gettimeofday_args *uap)
1165 {
1166 	struct timeval atv;
1167 	l_timeval atv32;
1168 	struct timezone rtz;
1169 	int error = 0;
1170 
1171 	if (uap->tp) {
1172 		microtime(&atv);
1173 		atv32.tv_sec = atv.tv_sec;
1174 		atv32.tv_usec = atv.tv_usec;
1175 		error = copyout(&atv32, uap->tp, sizeof(atv32));
1176 	}
1177 	if (error == 0 && uap->tzp != NULL) {
1178 		rtz.tz_minuteswest = tz_minuteswest;
1179 		rtz.tz_dsttime = tz_dsttime;
1180 		error = copyout(&rtz, uap->tzp, sizeof(rtz));
1181 	}
1182 	return (error);
1183 }
1184 
1185 int
1186 linux_settimeofday(struct thread *td, struct linux_settimeofday_args *uap)
1187 {
1188 	l_timeval atv32;
1189 	struct timeval atv, *tvp;
1190 	struct timezone atz, *tzp;
1191 	int error;
1192 
1193 	if (uap->tp) {
1194 		error = copyin(uap->tp, &atv32, sizeof(atv32));
1195 		if (error)
1196 			return (error);
1197 		atv.tv_sec = atv32.tv_sec;
1198 		atv.tv_usec = atv32.tv_usec;
1199 		tvp = &atv;
1200 	} else
1201 		tvp = NULL;
1202 	if (uap->tzp) {
1203 		error = copyin(uap->tzp, &atz, sizeof(atz));
1204 		if (error)
1205 			return (error);
1206 		tzp = &atz;
1207 	} else
1208 		tzp = NULL;
1209 	return (kern_settimeofday(td, tvp, tzp));
1210 }
1211 
1212 int
1213 linux_getrusage(struct thread *td, struct linux_getrusage_args *uap)
1214 {
1215 	struct l_rusage s32;
1216 	struct rusage s;
1217 	int error;
1218 
1219 	error = kern_getrusage(td, uap->who, &s);
1220 	if (error != 0)
1221 		return (error);
1222 	if (uap->rusage != NULL) {
1223 		s32.ru_utime.tv_sec = s.ru_utime.tv_sec;
1224 		s32.ru_utime.tv_usec = s.ru_utime.tv_usec;
1225 		s32.ru_stime.tv_sec = s.ru_stime.tv_sec;
1226 		s32.ru_stime.tv_usec = s.ru_stime.tv_usec;
1227 		s32.ru_maxrss = s.ru_maxrss;
1228 		s32.ru_ixrss = s.ru_ixrss;
1229 		s32.ru_idrss = s.ru_idrss;
1230 		s32.ru_isrss = s.ru_isrss;
1231 		s32.ru_minflt = s.ru_minflt;
1232 		s32.ru_majflt = s.ru_majflt;
1233 		s32.ru_nswap = s.ru_nswap;
1234 		s32.ru_inblock = s.ru_inblock;
1235 		s32.ru_oublock = s.ru_oublock;
1236 		s32.ru_msgsnd = s.ru_msgsnd;
1237 		s32.ru_msgrcv = s.ru_msgrcv;
1238 		s32.ru_nsignals = s.ru_nsignals;
1239 		s32.ru_nvcsw = s.ru_nvcsw;
1240 		s32.ru_nivcsw = s.ru_nivcsw;
1241 		error = copyout(&s32, uap->rusage, sizeof(s32));
1242 	}
1243 	return (error);
1244 }
1245 
1246 int
1247 linux_sched_rr_get_interval(struct thread *td,
1248     struct linux_sched_rr_get_interval_args *uap)
1249 {
1250 	struct timespec ts;
1251 	struct l_timespec ts32;
1252 	int error;
1253 
1254 	error = kern_sched_rr_get_interval(td, uap->pid, &ts);
1255 	if (error != 0)
1256 		return (error);
1257 	ts32.tv_sec = ts.tv_sec;
1258 	ts32.tv_nsec = ts.tv_nsec;
1259 	return (copyout(&ts32, uap->interval, sizeof(ts32)));
1260 }
1261 
1262 int
1263 linux_set_thread_area(struct thread *td,
1264     struct linux_set_thread_area_args *args)
1265 {
1266 	struct l_user_desc info;
1267 	struct user_segment_descriptor sd;
1268 	int a[2];
1269 	int error;
1270 
1271 	error = copyin(args->desc, &info, sizeof(struct l_user_desc));
1272 	if (error)
1273 		return (error);
1274 
1275 #ifdef DEBUG
1276 	if (ldebug(set_thread_area))
1277 		printf(ARGS(set_thread_area, "%i, %x, %x, %i, %i, %i, "
1278 		    "%i, %i, %i"), info.entry_number, info.base_addr,
1279 		    info.limit, info.seg_32bit, info.contents,
1280 		    info.read_exec_only, info.limit_in_pages,
1281 		    info.seg_not_present, info.useable);
1282 #endif
1283 
1284 	/*
1285 	 * Semantics of Linux version: every thread in the system has array
1286 	 * of three TLS descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown.
1287 	 * This syscall loads one of the selected TLS decriptors with a value
1288 	 * and also loads GDT descriptors 6, 7 and 8 with the content of
1289 	 * the per-thread descriptors.
1290 	 *
1291 	 * Semantics of FreeBSD version: I think we can ignore that Linux has
1292 	 * three per-thread descriptors and use just the first one.
1293 	 * The tls_array[] is used only in [gs]et_thread_area() syscalls and
1294 	 * for loading the GDT descriptors. We use just one GDT descriptor
1295 	 * for TLS, so we will load just one.
1296 	 *
1297 	 * XXX: This doesn't work when a user space process tries to use more
1298 	 * than one TLS segment. Comment in the Linux source says wine might
1299 	 * do this.
1300 	 */
1301 
1302 	/*
1303 	 * GLIBC reads current %gs and call set_thread_area() with it.
1304 	 * We should let GUDATA_SEL and GUGS32_SEL proceed as well because
1305 	 * we use these segments.
1306 	 */
1307 	switch (info.entry_number) {
1308 	case GUGS32_SEL:
1309 	case GUDATA_SEL:
1310 	case 6:
1311 	case -1:
1312 		info.entry_number = GUGS32_SEL;
1313 		break;
1314 	default:
1315 		return (EINVAL);
1316 	}
1317 
1318 	/*
1319 	 * We have to copy out the GDT entry we use.
1320 	 *
1321 	 * XXX: What if a user space program does not check the return value
1322 	 * and tries to use 6, 7 or 8?
1323 	 */
1324 	error = copyout(&info, args->desc, sizeof(struct l_user_desc));
1325 	if (error)
1326 		return (error);
1327 
1328 	if (LINUX_LDT_empty(&info)) {
1329 		a[0] = 0;
1330 		a[1] = 0;
1331 	} else {
1332 		a[0] = LINUX_LDT_entry_a(&info);
1333 		a[1] = LINUX_LDT_entry_b(&info);
1334 	}
1335 
1336 	memcpy(&sd, &a, sizeof(a));
1337 #ifdef DEBUG
1338 	if (ldebug(set_thread_area))
1339 		printf("Segment created in set_thread_area: "
1340 		    "lobase: %x, hibase: %x, lolimit: %x, hilimit: %x, "
1341 		    "type: %i, dpl: %i, p: %i, xx: %i, long: %i, "
1342 		    "def32: %i, gran: %i\n",
1343 		    sd.sd_lobase,
1344 		    sd.sd_hibase,
1345 		    sd.sd_lolimit,
1346 		    sd.sd_hilimit,
1347 		    sd.sd_type,
1348 		    sd.sd_dpl,
1349 		    sd.sd_p,
1350 		    sd.sd_xx,
1351 		    sd.sd_long,
1352 		    sd.sd_def32,
1353 		    sd.sd_gran);
1354 #endif
1355 
1356 	td->td_pcb->pcb_gsbase = (register_t)info.base_addr;
1357 	td->td_pcb->pcb_flags |= PCB_32BIT | PCB_GS32BIT;
1358 	update_gdt_gsbase(td, info.base_addr);
1359 
1360 	return (0);
1361 }
1362