xref: /freebsd/sys/amd64/linux32/linux32_machdep.c (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 2004 Tim J. Robbins
5  * Copyright (c) 2002 Doug Rabson
6  * Copyright (c) 2000 Marcel Moolenaar
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer
14  *    in this position and unchanged.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. The name of the author may not be used to endorse or promote products
19  *    derived from this software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <sys/param.h>
34 #include <sys/lock.h>
35 #include <sys/mutex.h>
36 #include <sys/priv.h>
37 #include <sys/proc.h>
38 #include <sys/reg.h>
39 #include <sys/syscallsubr.h>
40 
41 #include <machine/md_var.h>
42 #include <machine/specialreg.h>
43 #include <x86/ifunc.h>
44 
45 #include <compat/freebsd32/freebsd32_util.h>
46 #include <amd64/linux32/linux.h>
47 #include <amd64/linux32/linux32_proto.h>
48 #include <compat/linux/linux_emul.h>
49 #include <compat/linux/linux_fork.h>
50 #include <compat/linux/linux_ipc.h>
51 #include <compat/linux/linux_mmap.h>
52 #include <compat/linux/linux_signal.h>
53 #include <compat/linux/linux_util.h>
54 
55 static void	bsd_to_linux_rusage(struct rusage *ru, struct l_rusage *lru);
56 
57 struct l_old_select_argv {
58 	l_int		nfds;
59 	l_uintptr_t	readfds;
60 	l_uintptr_t	writefds;
61 	l_uintptr_t	exceptfds;
62 	l_uintptr_t	timeout;
63 } __packed;
64 
65 static void
66 bsd_to_linux_rusage(struct rusage *ru, struct l_rusage *lru)
67 {
68 
69 	lru->ru_utime.tv_sec = ru->ru_utime.tv_sec;
70 	lru->ru_utime.tv_usec = ru->ru_utime.tv_usec;
71 	lru->ru_stime.tv_sec = ru->ru_stime.tv_sec;
72 	lru->ru_stime.tv_usec = ru->ru_stime.tv_usec;
73 	lru->ru_maxrss = ru->ru_maxrss;
74 	lru->ru_ixrss = ru->ru_ixrss;
75 	lru->ru_idrss = ru->ru_idrss;
76 	lru->ru_isrss = ru->ru_isrss;
77 	lru->ru_minflt = ru->ru_minflt;
78 	lru->ru_majflt = ru->ru_majflt;
79 	lru->ru_nswap = ru->ru_nswap;
80 	lru->ru_inblock = ru->ru_inblock;
81 	lru->ru_oublock = ru->ru_oublock;
82 	lru->ru_msgsnd = ru->ru_msgsnd;
83 	lru->ru_msgrcv = ru->ru_msgrcv;
84 	lru->ru_nsignals = ru->ru_nsignals;
85 	lru->ru_nvcsw = ru->ru_nvcsw;
86 	lru->ru_nivcsw = ru->ru_nivcsw;
87 }
88 
89 int
90 linux_copyout_rusage(struct rusage *ru, void *uaddr)
91 {
92 	struct l_rusage lru;
93 
94 	bsd_to_linux_rusage(ru, &lru);
95 
96 	return (copyout(&lru, uaddr, sizeof(struct l_rusage)));
97 }
98 
99 int
100 linux_readv(struct thread *td, struct linux_readv_args *uap)
101 {
102 	struct uio *auio;
103 	int error;
104 
105 	error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio);
106 	if (error)
107 		return (error);
108 	error = kern_readv(td, uap->fd, auio);
109 	freeuio(auio);
110 	return (error);
111 }
112 
113 struct l_ipc_kludge {
114 	l_uintptr_t msgp;
115 	l_long msgtyp;
116 } __packed;
117 
118 int
119 linux_ipc(struct thread *td, struct linux_ipc_args *args)
120 {
121 
122 	switch (args->what & 0xFFFF) {
123 	case LINUX_SEMOP: {
124 
125 		return (kern_semop(td, args->arg1, PTRIN(args->ptr),
126 		    args->arg2, NULL));
127 	}
128 	case LINUX_SEMGET: {
129 		struct linux_semget_args a;
130 
131 		a.key = args->arg1;
132 		a.nsems = args->arg2;
133 		a.semflg = args->arg3;
134 		return (linux_semget(td, &a));
135 	}
136 	case LINUX_SEMCTL: {
137 		struct linux_semctl_args a;
138 		int error;
139 
140 		a.semid = args->arg1;
141 		a.semnum = args->arg2;
142 		a.cmd = args->arg3;
143 		error = copyin(PTRIN(args->ptr), &a.arg, sizeof(a.arg));
144 		if (error)
145 			return (error);
146 		return (linux_semctl(td, &a));
147 	}
148 	case LINUX_SEMTIMEDOP: {
149 		struct linux_semtimedop_args a;
150 
151 		a.semid = args->arg1;
152 		a.tsops = PTRIN(args->ptr);
153 		a.nsops = args->arg2;
154 		a.timeout = PTRIN(args->arg5);
155 		return (linux_semtimedop(td, &a));
156 	}
157 	case LINUX_MSGSND: {
158 		struct linux_msgsnd_args a;
159 
160 		a.msqid = args->arg1;
161 		a.msgp = PTRIN(args->ptr);
162 		a.msgsz = args->arg2;
163 		a.msgflg = args->arg3;
164 		return (linux_msgsnd(td, &a));
165 	}
166 	case LINUX_MSGRCV: {
167 		struct linux_msgrcv_args a;
168 
169 		a.msqid = args->arg1;
170 		a.msgsz = args->arg2;
171 		a.msgflg = args->arg3;
172 		if ((args->what >> 16) == 0) {
173 			struct l_ipc_kludge tmp;
174 			int error;
175 
176 			if (args->ptr == 0)
177 				return (EINVAL);
178 			error = copyin(PTRIN(args->ptr), &tmp, sizeof(tmp));
179 			if (error)
180 				return (error);
181 			a.msgp = PTRIN(tmp.msgp);
182 			a.msgtyp = tmp.msgtyp;
183 		} else {
184 			a.msgp = PTRIN(args->ptr);
185 			a.msgtyp = args->arg5;
186 		}
187 		return (linux_msgrcv(td, &a));
188 	}
189 	case LINUX_MSGGET: {
190 		struct linux_msgget_args a;
191 
192 		a.key = args->arg1;
193 		a.msgflg = args->arg2;
194 		return (linux_msgget(td, &a));
195 	}
196 	case LINUX_MSGCTL: {
197 		struct linux_msgctl_args a;
198 
199 		a.msqid = args->arg1;
200 		a.cmd = args->arg2;
201 		a.buf = PTRIN(args->ptr);
202 		return (linux_msgctl(td, &a));
203 	}
204 	case LINUX_SHMAT: {
205 		struct linux_shmat_args a;
206 		l_uintptr_t addr;
207 		int error;
208 
209 		a.shmid = args->arg1;
210 		a.shmaddr = PTRIN(args->ptr);
211 		a.shmflg = args->arg2;
212 		error = linux_shmat(td, &a);
213 		if (error != 0)
214 			return (error);
215 		addr = td->td_retval[0];
216 		error = copyout(&addr, PTRIN(args->arg3), sizeof(addr));
217 		td->td_retval[0] = 0;
218 		return (error);
219 	}
220 	case LINUX_SHMDT: {
221 		struct linux_shmdt_args a;
222 
223 		a.shmaddr = PTRIN(args->ptr);
224 		return (linux_shmdt(td, &a));
225 	}
226 	case LINUX_SHMGET: {
227 		struct linux_shmget_args a;
228 
229 		a.key = args->arg1;
230 		a.size = args->arg2;
231 		a.shmflg = args->arg3;
232 		return (linux_shmget(td, &a));
233 	}
234 	case LINUX_SHMCTL: {
235 		struct linux_shmctl_args a;
236 
237 		a.shmid = args->arg1;
238 		a.cmd = args->arg2;
239 		a.buf = PTRIN(args->ptr);
240 		return (linux_shmctl(td, &a));
241 	}
242 	default:
243 		break;
244 	}
245 
246 	return (EINVAL);
247 }
248 
249 int
250 linux_old_select(struct thread *td, struct linux_old_select_args *args)
251 {
252 	struct l_old_select_argv linux_args;
253 	struct linux_select_args newsel;
254 	int error;
255 
256 	error = copyin(args->ptr, &linux_args, sizeof(linux_args));
257 	if (error)
258 		return (error);
259 
260 	newsel.nfds = linux_args.nfds;
261 	newsel.readfds = PTRIN(linux_args.readfds);
262 	newsel.writefds = PTRIN(linux_args.writefds);
263 	newsel.exceptfds = PTRIN(linux_args.exceptfds);
264 	newsel.timeout = PTRIN(linux_args.timeout);
265 	return (linux_select(td, &newsel));
266 }
267 
268 int
269 linux_set_cloned_tls(struct thread *td, void *desc)
270 {
271 	struct l_user_desc info;
272 	struct pcb *pcb;
273 	int error;
274 
275 	error = copyin(desc, &info, sizeof(struct l_user_desc));
276 	if (error) {
277 		linux_msg(td, "set_cloned_tls copyin info failed!");
278 	} else {
279 		/* We might copy out the entry_number as GUGS32_SEL. */
280 		info.entry_number = GUGS32_SEL;
281 		error = copyout(&info, desc, sizeof(struct l_user_desc));
282 		if (error)
283 			linux_msg(td, "set_cloned_tls copyout info failed!");
284 
285 		pcb = td->td_pcb;
286 		update_pcb_bases(pcb);
287 		pcb->pcb_gsbase = (register_t)info.base_addr;
288 		td->td_frame->tf_gs = GSEL(GUGS32_SEL, SEL_UPL);
289 	}
290 
291 	return (error);
292 }
293 
294 int
295 linux_set_upcall(struct thread *td, register_t stack)
296 {
297 
298 	if (stack)
299 		td->td_frame->tf_rsp = stack;
300 
301 	/*
302 	 * The newly created Linux thread returns
303 	 * to the user space by the same path that a parent do.
304 	 */
305 	td->td_frame->tf_rax = 0;
306 	return (0);
307 }
308 
309 int
310 linux_mmap(struct thread *td, struct linux_mmap_args *args)
311 {
312 	int error;
313 	struct l_mmap_argv linux_args;
314 
315 	error = copyin(args->ptr, &linux_args, sizeof(linux_args));
316 	if (error)
317 		return (error);
318 
319 	return (linux_mmap_common(td, linux_args.addr, linux_args.len,
320 	    linux_args.prot, linux_args.flags, linux_args.fd,
321 	    (uint32_t)linux_args.pgoff));
322 }
323 
324 int
325 linux_iopl(struct thread *td, struct linux_iopl_args *args)
326 {
327 	int error;
328 
329 	if (args->level < 0 || args->level > 3)
330 		return (EINVAL);
331 	if ((error = priv_check(td, PRIV_IO)) != 0)
332 		return (error);
333 	if ((error = securelevel_gt(td->td_ucred, 0)) != 0)
334 		return (error);
335 	td->td_frame->tf_rflags = (td->td_frame->tf_rflags & ~PSL_IOPL) |
336 	    (args->level * (PSL_IOPL / 3));
337 
338 	return (0);
339 }
340 
341 int
342 linux_sigaction(struct thread *td, struct linux_sigaction_args *args)
343 {
344 	l_osigaction_t osa;
345 	l_sigaction_t act, oact;
346 	int error;
347 
348 	if (args->nsa != NULL) {
349 		error = copyin(args->nsa, &osa, sizeof(l_osigaction_t));
350 		if (error)
351 			return (error);
352 		act.lsa_handler = osa.lsa_handler;
353 		act.lsa_flags = osa.lsa_flags;
354 		act.lsa_restorer = osa.lsa_restorer;
355 		LINUX_SIGEMPTYSET(act.lsa_mask);
356 		act.lsa_mask.__mask = osa.lsa_mask;
357 	}
358 
359 	error = linux_do_sigaction(td, args->sig, args->nsa ? &act : NULL,
360 	    args->osa ? &oact : NULL);
361 
362 	if (args->osa != NULL && !error) {
363 		osa.lsa_handler = oact.lsa_handler;
364 		osa.lsa_flags = oact.lsa_flags;
365 		osa.lsa_restorer = oact.lsa_restorer;
366 		osa.lsa_mask = oact.lsa_mask.__mask;
367 		error = copyout(&osa, args->osa, sizeof(l_osigaction_t));
368 	}
369 
370 	return (error);
371 }
372 
373 /*
374  * Linux has two extra args, restart and oldmask.  We don't use these,
375  * but it seems that "restart" is actually a context pointer that
376  * enables the signal to happen with a different register set.
377  */
378 int
379 linux_sigsuspend(struct thread *td, struct linux_sigsuspend_args *args)
380 {
381 	sigset_t sigmask;
382 	l_sigset_t mask;
383 
384 	LINUX_SIGEMPTYSET(mask);
385 	mask.__mask = args->mask;
386 	linux_to_bsd_sigset(&mask, &sigmask);
387 	return (kern_sigsuspend(td, sigmask));
388 }
389 
390 int
391 linux_pause(struct thread *td, struct linux_pause_args *args)
392 {
393 	struct proc *p = td->td_proc;
394 	sigset_t sigmask;
395 
396 	PROC_LOCK(p);
397 	sigmask = td->td_sigmask;
398 	PROC_UNLOCK(p);
399 	return (kern_sigsuspend(td, sigmask));
400 }
401 
402 int
403 linux_gettimeofday(struct thread *td, struct linux_gettimeofday_args *uap)
404 {
405 	struct timeval atv;
406 	l_timeval atv32;
407 	struct timezone rtz;
408 	int error = 0;
409 
410 	if (uap->tp) {
411 		microtime(&atv);
412 		atv32.tv_sec = atv.tv_sec;
413 		atv32.tv_usec = atv.tv_usec;
414 		error = copyout(&atv32, uap->tp, sizeof(atv32));
415 	}
416 	if (error == 0 && uap->tzp != NULL) {
417 		rtz.tz_minuteswest = 0;
418 		rtz.tz_dsttime = 0;
419 		error = copyout(&rtz, uap->tzp, sizeof(rtz));
420 	}
421 	return (error);
422 }
423 
424 int
425 linux_settimeofday(struct thread *td, struct linux_settimeofday_args *uap)
426 {
427 	l_timeval atv32;
428 	struct timeval atv, *tvp;
429 	struct timezone atz, *tzp;
430 	int error;
431 
432 	if (uap->tp) {
433 		error = copyin(uap->tp, &atv32, sizeof(atv32));
434 		if (error)
435 			return (error);
436 		atv.tv_sec = atv32.tv_sec;
437 		atv.tv_usec = atv32.tv_usec;
438 		tvp = &atv;
439 	} else
440 		tvp = NULL;
441 	if (uap->tzp) {
442 		error = copyin(uap->tzp, &atz, sizeof(atz));
443 		if (error)
444 			return (error);
445 		tzp = &atz;
446 	} else
447 		tzp = NULL;
448 	return (kern_settimeofday(td, tvp, tzp));
449 }
450 
451 int
452 linux_getrusage(struct thread *td, struct linux_getrusage_args *uap)
453 {
454 	struct rusage s;
455 	int error;
456 
457 	error = kern_getrusage(td, uap->who, &s);
458 	if (error != 0)
459 		return (error);
460 	if (uap->rusage != NULL)
461 		error = linux_copyout_rusage(&s, uap->rusage);
462 	return (error);
463 }
464 
465 int
466 linux_set_thread_area(struct thread *td,
467     struct linux_set_thread_area_args *args)
468 {
469 	struct l_user_desc info;
470 	struct pcb *pcb;
471 	int error;
472 
473 	error = copyin(args->desc, &info, sizeof(struct l_user_desc));
474 	if (error)
475 		return (error);
476 
477 	/*
478 	 * Semantics of Linux version: every thread in the system has array
479 	 * of three TLS descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown.
480 	 * This syscall loads one of the selected TLS descriptors with a value
481 	 * and also loads GDT descriptors 6, 7 and 8 with the content of
482 	 * the per-thread descriptors.
483 	 *
484 	 * Semantics of FreeBSD version: I think we can ignore that Linux has
485 	 * three per-thread descriptors and use just the first one.
486 	 * The tls_array[] is used only in [gs]et_thread_area() syscalls and
487 	 * for loading the GDT descriptors. We use just one GDT descriptor
488 	 * for TLS, so we will load just one.
489 	 *
490 	 * XXX: This doesn't work when a user space process tries to use more
491 	 * than one TLS segment. Comment in the Linux source says wine might
492 	 * do this.
493 	 */
494 
495 	/*
496 	 * GLIBC reads current %gs and call set_thread_area() with it.
497 	 * We should let GUDATA_SEL and GUGS32_SEL proceed as well because
498 	 * we use these segments.
499 	 */
500 	switch (info.entry_number) {
501 	case GUGS32_SEL:
502 	case GUDATA_SEL:
503 	case 6:
504 	case -1:
505 		info.entry_number = GUGS32_SEL;
506 		break;
507 	default:
508 		return (EINVAL);
509 	}
510 
511 	/*
512 	 * We have to copy out the GDT entry we use.
513 	 *
514 	 * XXX: What if a user space program does not check the return value
515 	 * and tries to use 6, 7 or 8?
516 	 */
517 	error = copyout(&info, args->desc, sizeof(struct l_user_desc));
518 	if (error)
519 		return (error);
520 
521 	pcb = td->td_pcb;
522 	update_pcb_bases(pcb);
523 	pcb->pcb_gsbase = (register_t)info.base_addr;
524 	update_gdt_gsbase(td, info.base_addr);
525 
526 	return (0);
527 }
528 
529 void
530 bsd_to_linux_regset32(const struct reg32 *b_reg,
531     struct linux_pt_regset32 *l_regset)
532 {
533 
534 	l_regset->ebx = b_reg->r_ebx;
535 	l_regset->ecx = b_reg->r_ecx;
536 	l_regset->edx = b_reg->r_edx;
537 	l_regset->esi = b_reg->r_esi;
538 	l_regset->edi = b_reg->r_edi;
539 	l_regset->ebp = b_reg->r_ebp;
540 	l_regset->eax = b_reg->r_eax;
541 	l_regset->ds = b_reg->r_ds;
542 	l_regset->es = b_reg->r_es;
543 	l_regset->fs = b_reg->r_fs;
544 	l_regset->gs = b_reg->r_gs;
545 	l_regset->orig_eax = b_reg->r_eax;
546 	l_regset->eip = b_reg->r_eip;
547 	l_regset->cs = b_reg->r_cs;
548 	l_regset->eflags = b_reg->r_eflags;
549 	l_regset->esp = b_reg->r_esp;
550 	l_regset->ss = b_reg->r_ss;
551 }
552 
553 int futex_xchgl_nosmap(int oparg, uint32_t *uaddr, int *oldval);
554 int futex_xchgl_smap(int oparg, uint32_t *uaddr, int *oldval);
555 DEFINE_IFUNC(, int, futex_xchgl, (int, uint32_t *, int *))
556 {
557 
558 	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
559 	    futex_xchgl_smap : futex_xchgl_nosmap);
560 }
561 
562 int futex_addl_nosmap(int oparg, uint32_t *uaddr, int *oldval);
563 int futex_addl_smap(int oparg, uint32_t *uaddr, int *oldval);
564 DEFINE_IFUNC(, int, futex_addl, (int, uint32_t *, int *))
565 {
566 
567 	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
568 	    futex_addl_smap : futex_addl_nosmap);
569 }
570 
571 int futex_orl_nosmap(int oparg, uint32_t *uaddr, int *oldval);
572 int futex_orl_smap(int oparg, uint32_t *uaddr, int *oldval);
573 DEFINE_IFUNC(, int, futex_orl, (int, uint32_t *, int *))
574 {
575 
576 	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
577 	    futex_orl_smap : futex_orl_nosmap);
578 }
579 
580 int futex_andl_nosmap(int oparg, uint32_t *uaddr, int *oldval);
581 int futex_andl_smap(int oparg, uint32_t *uaddr, int *oldval);
582 DEFINE_IFUNC(, int, futex_andl, (int, uint32_t *, int *))
583 {
584 
585 	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
586 	    futex_andl_smap : futex_andl_nosmap);
587 }
588 
589 int futex_xorl_nosmap(int oparg, uint32_t *uaddr, int *oldval);
590 int futex_xorl_smap(int oparg, uint32_t *uaddr, int *oldval);
591 DEFINE_IFUNC(, int, futex_xorl, (int, uint32_t *, int *))
592 {
593 
594 	return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
595 	    futex_xorl_smap : futex_xorl_nosmap);
596 }
597 
598 int
599 linux_ptrace_peekuser(struct thread *td, pid_t pid, void *addr, void *data)
600 {
601 
602 	LINUX_RATELIMIT_MSG_OPT1("PTRACE_PEEKUSER offset %ld not implemented; "
603 	    "returning EINVAL", (uintptr_t)addr);
604 	return (EINVAL);
605 }
606 
607 int
608 linux_ptrace_pokeuser(struct thread *td, pid_t pid, void *addr, void *data)
609 {
610 
611 	LINUX_RATELIMIT_MSG_OPT1("PTRACE_POKEUSER offset %ld "
612 	    "not implemented; returning EINVAL", (uintptr_t)addr);
613 	return (EINVAL);
614 }
615