1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 2004 Tim J. Robbins
5 * Copyright (c) 2002 Doug Rabson
6 * Copyright (c) 2000 Marcel Moolenaar
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer
14 * in this position and unchanged.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. The name of the author may not be used to endorse or promote products
19 * derived from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #include <sys/param.h>
34 #include <sys/lock.h>
35 #include <sys/mutex.h>
36 #include <sys/priv.h>
37 #include <sys/proc.h>
38 #include <sys/reg.h>
39 #include <sys/syscallsubr.h>
40
41 #include <machine/md_var.h>
42 #include <machine/specialreg.h>
43 #include <x86/ifunc.h>
44
45 #include <compat/freebsd32/freebsd32_util.h>
46 #include <amd64/linux32/linux.h>
47 #include <amd64/linux32/linux32_proto.h>
48 #include <compat/linux/linux_emul.h>
49 #include <compat/linux/linux_fork.h>
50 #include <compat/linux/linux_ipc.h>
51 #include <compat/linux/linux_mmap.h>
52 #include <compat/linux/linux_signal.h>
53 #include <compat/linux/linux_util.h>
54
55 static void bsd_to_linux_rusage(struct rusage *ru, struct l_rusage *lru);
56
57 struct l_old_select_argv {
58 l_int nfds;
59 l_uintptr_t readfds;
60 l_uintptr_t writefds;
61 l_uintptr_t exceptfds;
62 l_uintptr_t timeout;
63 } __packed;
64
65 static void
bsd_to_linux_rusage(struct rusage * ru,struct l_rusage * lru)66 bsd_to_linux_rusage(struct rusage *ru, struct l_rusage *lru)
67 {
68
69 lru->ru_utime.tv_sec = ru->ru_utime.tv_sec;
70 lru->ru_utime.tv_usec = ru->ru_utime.tv_usec;
71 lru->ru_stime.tv_sec = ru->ru_stime.tv_sec;
72 lru->ru_stime.tv_usec = ru->ru_stime.tv_usec;
73 lru->ru_maxrss = ru->ru_maxrss;
74 lru->ru_ixrss = ru->ru_ixrss;
75 lru->ru_idrss = ru->ru_idrss;
76 lru->ru_isrss = ru->ru_isrss;
77 lru->ru_minflt = ru->ru_minflt;
78 lru->ru_majflt = ru->ru_majflt;
79 lru->ru_nswap = ru->ru_nswap;
80 lru->ru_inblock = ru->ru_inblock;
81 lru->ru_oublock = ru->ru_oublock;
82 lru->ru_msgsnd = ru->ru_msgsnd;
83 lru->ru_msgrcv = ru->ru_msgrcv;
84 lru->ru_nsignals = ru->ru_nsignals;
85 lru->ru_nvcsw = ru->ru_nvcsw;
86 lru->ru_nivcsw = ru->ru_nivcsw;
87 }
88
89 int
linux_copyout_rusage(struct rusage * ru,void * uaddr)90 linux_copyout_rusage(struct rusage *ru, void *uaddr)
91 {
92 struct l_rusage lru;
93
94 bsd_to_linux_rusage(ru, &lru);
95
96 return (copyout(&lru, uaddr, sizeof(struct l_rusage)));
97 }
98
99 int
linux_readv(struct thread * td,struct linux_readv_args * uap)100 linux_readv(struct thread *td, struct linux_readv_args *uap)
101 {
102 struct uio *auio;
103 int error;
104
105 error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio);
106 if (error)
107 return (error);
108 error = kern_readv(td, uap->fd, auio);
109 freeuio(auio);
110 return (error);
111 }
112
113 struct l_ipc_kludge {
114 l_uintptr_t msgp;
115 l_long msgtyp;
116 } __packed;
117
118 int
linux_ipc(struct thread * td,struct linux_ipc_args * args)119 linux_ipc(struct thread *td, struct linux_ipc_args *args)
120 {
121
122 switch (args->what & 0xFFFF) {
123 case LINUX_SEMOP: {
124
125 return (kern_semop(td, args->arg1, PTRIN(args->ptr),
126 args->arg2, NULL));
127 }
128 case LINUX_SEMGET: {
129 struct linux_semget_args a;
130
131 a.key = args->arg1;
132 a.nsems = args->arg2;
133 a.semflg = args->arg3;
134 return (linux_semget(td, &a));
135 }
136 case LINUX_SEMCTL: {
137 struct linux_semctl_args a;
138 int error;
139
140 a.semid = args->arg1;
141 a.semnum = args->arg2;
142 a.cmd = args->arg3;
143 error = copyin(PTRIN(args->ptr), &a.arg, sizeof(a.arg));
144 if (error)
145 return (error);
146 return (linux_semctl(td, &a));
147 }
148 case LINUX_SEMTIMEDOP: {
149 struct linux_semtimedop_args a;
150
151 a.semid = args->arg1;
152 a.tsops = PTRIN(args->ptr);
153 a.nsops = args->arg2;
154 a.timeout = PTRIN(args->arg5);
155 return (linux_semtimedop(td, &a));
156 }
157 case LINUX_MSGSND: {
158 struct linux_msgsnd_args a;
159
160 a.msqid = args->arg1;
161 a.msgp = PTRIN(args->ptr);
162 a.msgsz = args->arg2;
163 a.msgflg = args->arg3;
164 return (linux_msgsnd(td, &a));
165 }
166 case LINUX_MSGRCV: {
167 struct linux_msgrcv_args a;
168
169 a.msqid = args->arg1;
170 a.msgsz = args->arg2;
171 a.msgflg = args->arg3;
172 if ((args->what >> 16) == 0) {
173 struct l_ipc_kludge tmp;
174 int error;
175
176 if (args->ptr == 0)
177 return (EINVAL);
178 error = copyin(PTRIN(args->ptr), &tmp, sizeof(tmp));
179 if (error)
180 return (error);
181 a.msgp = PTRIN(tmp.msgp);
182 a.msgtyp = tmp.msgtyp;
183 } else {
184 a.msgp = PTRIN(args->ptr);
185 a.msgtyp = args->arg5;
186 }
187 return (linux_msgrcv(td, &a));
188 }
189 case LINUX_MSGGET: {
190 struct linux_msgget_args a;
191
192 a.key = args->arg1;
193 a.msgflg = args->arg2;
194 return (linux_msgget(td, &a));
195 }
196 case LINUX_MSGCTL: {
197 struct linux_msgctl_args a;
198
199 a.msqid = args->arg1;
200 a.cmd = args->arg2;
201 a.buf = PTRIN(args->ptr);
202 return (linux_msgctl(td, &a));
203 }
204 case LINUX_SHMAT: {
205 struct linux_shmat_args a;
206 l_uintptr_t addr;
207 int error;
208
209 a.shmid = args->arg1;
210 a.shmaddr = PTRIN(args->ptr);
211 a.shmflg = args->arg2;
212 error = linux_shmat(td, &a);
213 if (error != 0)
214 return (error);
215 addr = td->td_retval[0];
216 error = copyout(&addr, PTRIN(args->arg3), sizeof(addr));
217 td->td_retval[0] = 0;
218 return (error);
219 }
220 case LINUX_SHMDT: {
221 struct linux_shmdt_args a;
222
223 a.shmaddr = PTRIN(args->ptr);
224 return (linux_shmdt(td, &a));
225 }
226 case LINUX_SHMGET: {
227 struct linux_shmget_args a;
228
229 a.key = args->arg1;
230 a.size = args->arg2;
231 a.shmflg = args->arg3;
232 return (linux_shmget(td, &a));
233 }
234 case LINUX_SHMCTL: {
235 struct linux_shmctl_args a;
236
237 a.shmid = args->arg1;
238 a.cmd = args->arg2;
239 a.buf = PTRIN(args->ptr);
240 return (linux_shmctl(td, &a));
241 }
242 default:
243 break;
244 }
245
246 return (EINVAL);
247 }
248
249 int
linux_old_select(struct thread * td,struct linux_old_select_args * args)250 linux_old_select(struct thread *td, struct linux_old_select_args *args)
251 {
252 struct l_old_select_argv linux_args;
253 struct linux_select_args newsel;
254 int error;
255
256 error = copyin(args->ptr, &linux_args, sizeof(linux_args));
257 if (error)
258 return (error);
259
260 newsel.nfds = linux_args.nfds;
261 newsel.readfds = PTRIN(linux_args.readfds);
262 newsel.writefds = PTRIN(linux_args.writefds);
263 newsel.exceptfds = PTRIN(linux_args.exceptfds);
264 newsel.timeout = PTRIN(linux_args.timeout);
265 return (linux_select(td, &newsel));
266 }
267
268 int
linux_set_cloned_tls(struct thread * td,void * desc)269 linux_set_cloned_tls(struct thread *td, void *desc)
270 {
271 struct l_user_desc info;
272 struct pcb *pcb;
273 int error;
274
275 error = copyin(desc, &info, sizeof(struct l_user_desc));
276 if (error) {
277 linux_msg(td, "set_cloned_tls copyin info failed!");
278 } else {
279 /* We might copy out the entry_number as GUGS32_SEL. */
280 info.entry_number = GUGS32_SEL;
281 error = copyout(&info, desc, sizeof(struct l_user_desc));
282 if (error)
283 linux_msg(td, "set_cloned_tls copyout info failed!");
284
285 pcb = td->td_pcb;
286 update_pcb_bases(pcb);
287 pcb->pcb_gsbase = (register_t)info.base_addr;
288 td->td_frame->tf_gs = GSEL(GUGS32_SEL, SEL_UPL);
289 }
290
291 return (error);
292 }
293
294 int
linux_set_upcall(struct thread * td,register_t stack)295 linux_set_upcall(struct thread *td, register_t stack)
296 {
297
298 if (stack)
299 td->td_frame->tf_rsp = stack;
300
301 /*
302 * The newly created Linux thread returns
303 * to the user space by the same path that a parent do.
304 */
305 td->td_frame->tf_rax = 0;
306 return (0);
307 }
308
309 int
linux_mmap(struct thread * td,struct linux_mmap_args * args)310 linux_mmap(struct thread *td, struct linux_mmap_args *args)
311 {
312 int error;
313 struct l_mmap_argv linux_args;
314
315 error = copyin(args->ptr, &linux_args, sizeof(linux_args));
316 if (error)
317 return (error);
318
319 return (linux_mmap_common(td, linux_args.addr, linux_args.len,
320 linux_args.prot, linux_args.flags, linux_args.fd,
321 (uint32_t)linux_args.pgoff));
322 }
323
324 int
linux_iopl(struct thread * td,struct linux_iopl_args * args)325 linux_iopl(struct thread *td, struct linux_iopl_args *args)
326 {
327 int error;
328
329 if (args->level < 0 || args->level > 3)
330 return (EINVAL);
331 if ((error = priv_check(td, PRIV_IO)) != 0)
332 return (error);
333 if ((error = securelevel_gt(td->td_ucred, 0)) != 0)
334 return (error);
335 td->td_frame->tf_rflags = (td->td_frame->tf_rflags & ~PSL_IOPL) |
336 (args->level * (PSL_IOPL / 3));
337
338 return (0);
339 }
340
341 int
linux_sigaction(struct thread * td,struct linux_sigaction_args * args)342 linux_sigaction(struct thread *td, struct linux_sigaction_args *args)
343 {
344 l_osigaction_t osa;
345 l_sigaction_t act, oact;
346 int error;
347
348 if (args->nsa != NULL) {
349 error = copyin(args->nsa, &osa, sizeof(l_osigaction_t));
350 if (error)
351 return (error);
352 act.lsa_handler = osa.lsa_handler;
353 act.lsa_flags = osa.lsa_flags;
354 act.lsa_restorer = osa.lsa_restorer;
355 LINUX_SIGEMPTYSET(act.lsa_mask);
356 act.lsa_mask.__mask = osa.lsa_mask;
357 }
358
359 error = linux_do_sigaction(td, args->sig, args->nsa ? &act : NULL,
360 args->osa ? &oact : NULL);
361
362 if (args->osa != NULL && !error) {
363 osa.lsa_handler = oact.lsa_handler;
364 osa.lsa_flags = oact.lsa_flags;
365 osa.lsa_restorer = oact.lsa_restorer;
366 osa.lsa_mask = oact.lsa_mask.__mask;
367 error = copyout(&osa, args->osa, sizeof(l_osigaction_t));
368 }
369
370 return (error);
371 }
372
373 /*
374 * Linux has two extra args, restart and oldmask. We don't use these,
375 * but it seems that "restart" is actually a context pointer that
376 * enables the signal to happen with a different register set.
377 */
378 int
linux_sigsuspend(struct thread * td,struct linux_sigsuspend_args * args)379 linux_sigsuspend(struct thread *td, struct linux_sigsuspend_args *args)
380 {
381 sigset_t sigmask;
382 l_sigset_t mask;
383
384 LINUX_SIGEMPTYSET(mask);
385 mask.__mask = args->mask;
386 linux_to_bsd_sigset(&mask, &sigmask);
387 return (kern_sigsuspend(td, sigmask));
388 }
389
390 int
linux_pause(struct thread * td,struct linux_pause_args * args)391 linux_pause(struct thread *td, struct linux_pause_args *args)
392 {
393 struct proc *p = td->td_proc;
394 sigset_t sigmask;
395
396 PROC_LOCK(p);
397 sigmask = td->td_sigmask;
398 PROC_UNLOCK(p);
399 return (kern_sigsuspend(td, sigmask));
400 }
401
402 int
linux_gettimeofday(struct thread * td,struct linux_gettimeofday_args * uap)403 linux_gettimeofday(struct thread *td, struct linux_gettimeofday_args *uap)
404 {
405 struct timeval atv;
406 l_timeval atv32;
407 struct timezone rtz;
408 int error = 0;
409
410 if (uap->tp) {
411 microtime(&atv);
412 atv32.tv_sec = atv.tv_sec;
413 atv32.tv_usec = atv.tv_usec;
414 error = copyout(&atv32, uap->tp, sizeof(atv32));
415 }
416 if (error == 0 && uap->tzp != NULL) {
417 rtz.tz_minuteswest = 0;
418 rtz.tz_dsttime = 0;
419 error = copyout(&rtz, uap->tzp, sizeof(rtz));
420 }
421 return (error);
422 }
423
424 int
linux_settimeofday(struct thread * td,struct linux_settimeofday_args * uap)425 linux_settimeofday(struct thread *td, struct linux_settimeofday_args *uap)
426 {
427 l_timeval atv32;
428 struct timeval atv, *tvp;
429 struct timezone atz, *tzp;
430 int error;
431
432 if (uap->tp) {
433 error = copyin(uap->tp, &atv32, sizeof(atv32));
434 if (error)
435 return (error);
436 atv.tv_sec = atv32.tv_sec;
437 atv.tv_usec = atv32.tv_usec;
438 tvp = &atv;
439 } else
440 tvp = NULL;
441 if (uap->tzp) {
442 error = copyin(uap->tzp, &atz, sizeof(atz));
443 if (error)
444 return (error);
445 tzp = &atz;
446 } else
447 tzp = NULL;
448 return (kern_settimeofday(td, tvp, tzp));
449 }
450
451 int
linux_getrusage(struct thread * td,struct linux_getrusage_args * uap)452 linux_getrusage(struct thread *td, struct linux_getrusage_args *uap)
453 {
454 struct rusage s;
455 int error;
456
457 error = kern_getrusage(td, uap->who, &s);
458 if (error != 0)
459 return (error);
460 if (uap->rusage != NULL)
461 error = linux_copyout_rusage(&s, uap->rusage);
462 return (error);
463 }
464
465 int
linux_set_thread_area(struct thread * td,struct linux_set_thread_area_args * args)466 linux_set_thread_area(struct thread *td,
467 struct linux_set_thread_area_args *args)
468 {
469 struct l_user_desc info;
470 struct pcb *pcb;
471 int error;
472
473 error = copyin(args->desc, &info, sizeof(struct l_user_desc));
474 if (error)
475 return (error);
476
477 /*
478 * Semantics of Linux version: every thread in the system has array
479 * of three TLS descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown.
480 * This syscall loads one of the selected TLS descriptors with a value
481 * and also loads GDT descriptors 6, 7 and 8 with the content of
482 * the per-thread descriptors.
483 *
484 * Semantics of FreeBSD version: I think we can ignore that Linux has
485 * three per-thread descriptors and use just the first one.
486 * The tls_array[] is used only in [gs]et_thread_area() syscalls and
487 * for loading the GDT descriptors. We use just one GDT descriptor
488 * for TLS, so we will load just one.
489 *
490 * XXX: This doesn't work when a user space process tries to use more
491 * than one TLS segment. Comment in the Linux source says wine might
492 * do this.
493 */
494
495 /*
496 * GLIBC reads current %gs and call set_thread_area() with it.
497 * We should let GUDATA_SEL and GUGS32_SEL proceed as well because
498 * we use these segments.
499 */
500 switch (info.entry_number) {
501 case GUGS32_SEL:
502 case GUDATA_SEL:
503 case 6:
504 case -1:
505 info.entry_number = GUGS32_SEL;
506 break;
507 default:
508 return (EINVAL);
509 }
510
511 /*
512 * We have to copy out the GDT entry we use.
513 *
514 * XXX: What if a user space program does not check the return value
515 * and tries to use 6, 7 or 8?
516 */
517 error = copyout(&info, args->desc, sizeof(struct l_user_desc));
518 if (error)
519 return (error);
520
521 pcb = td->td_pcb;
522 update_pcb_bases(pcb);
523 pcb->pcb_gsbase = (register_t)info.base_addr;
524 update_gdt_gsbase(td, info.base_addr);
525
526 return (0);
527 }
528
529 void
bsd_to_linux_regset32(const struct reg32 * b_reg,struct linux_pt_regset32 * l_regset)530 bsd_to_linux_regset32(const struct reg32 *b_reg,
531 struct linux_pt_regset32 *l_regset)
532 {
533
534 l_regset->ebx = b_reg->r_ebx;
535 l_regset->ecx = b_reg->r_ecx;
536 l_regset->edx = b_reg->r_edx;
537 l_regset->esi = b_reg->r_esi;
538 l_regset->edi = b_reg->r_edi;
539 l_regset->ebp = b_reg->r_ebp;
540 l_regset->eax = b_reg->r_eax;
541 l_regset->ds = b_reg->r_ds;
542 l_regset->es = b_reg->r_es;
543 l_regset->fs = b_reg->r_fs;
544 l_regset->gs = b_reg->r_gs;
545 l_regset->orig_eax = b_reg->r_eax;
546 l_regset->eip = b_reg->r_eip;
547 l_regset->cs = b_reg->r_cs;
548 l_regset->eflags = b_reg->r_eflags;
549 l_regset->esp = b_reg->r_esp;
550 l_regset->ss = b_reg->r_ss;
551 }
552
553 int futex_xchgl_nosmap(int oparg, uint32_t *uaddr, int *oldval);
554 int futex_xchgl_smap(int oparg, uint32_t *uaddr, int *oldval);
555 DEFINE_IFUNC(, int, futex_xchgl, (int, uint32_t *, int *))
556 {
557
558 return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
559 futex_xchgl_smap : futex_xchgl_nosmap);
560 }
561
562 int futex_addl_nosmap(int oparg, uint32_t *uaddr, int *oldval);
563 int futex_addl_smap(int oparg, uint32_t *uaddr, int *oldval);
564 DEFINE_IFUNC(, int, futex_addl, (int, uint32_t *, int *))
565 {
566
567 return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
568 futex_addl_smap : futex_addl_nosmap);
569 }
570
571 int futex_orl_nosmap(int oparg, uint32_t *uaddr, int *oldval);
572 int futex_orl_smap(int oparg, uint32_t *uaddr, int *oldval);
573 DEFINE_IFUNC(, int, futex_orl, (int, uint32_t *, int *))
574 {
575
576 return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
577 futex_orl_smap : futex_orl_nosmap);
578 }
579
580 int futex_andl_nosmap(int oparg, uint32_t *uaddr, int *oldval);
581 int futex_andl_smap(int oparg, uint32_t *uaddr, int *oldval);
582 DEFINE_IFUNC(, int, futex_andl, (int, uint32_t *, int *))
583 {
584
585 return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
586 futex_andl_smap : futex_andl_nosmap);
587 }
588
589 int futex_xorl_nosmap(int oparg, uint32_t *uaddr, int *oldval);
590 int futex_xorl_smap(int oparg, uint32_t *uaddr, int *oldval);
591 DEFINE_IFUNC(, int, futex_xorl, (int, uint32_t *, int *))
592 {
593
594 return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?
595 futex_xorl_smap : futex_xorl_nosmap);
596 }
597
598 int
linux_ptrace_peekuser(struct thread * td,pid_t pid,void * addr,void * data)599 linux_ptrace_peekuser(struct thread *td, pid_t pid, void *addr, void *data)
600 {
601
602 LINUX_RATELIMIT_MSG_OPT1("PTRACE_PEEKUSER offset %ld not implemented; "
603 "returning EINVAL", (uintptr_t)addr);
604 return (EINVAL);
605 }
606
607 int
linux_ptrace_pokeuser(struct thread * td,pid_t pid,void * addr,void * data)608 linux_ptrace_pokeuser(struct thread *td, pid_t pid, void *addr, void *data)
609 {
610
611 LINUX_RATELIMIT_MSG_OPT1("PTRACE_POKEUSER offset %ld "
612 "not implemented; returning EINVAL", (uintptr_t)addr);
613 return (EINVAL);
614 }
615