1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2004 Tim J. Robbins 5 * Copyright (c) 2002 Doug Rabson 6 * Copyright (c) 2000 Marcel Moolenaar 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer 14 * in this position and unchanged. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include "opt_compat.h" 35 36 #include <sys/param.h> 37 #include <sys/systm.h> 38 #include <sys/imgact.h> 39 #include <sys/ktr.h> 40 #include <sys/lock.h> 41 #include <sys/mutex.h> 42 #include <sys/proc.h> 43 #include <sys/ptrace.h> 44 #include <sys/racct.h> 45 #include <sys/sched.h> 46 #include <sys/syscallsubr.h> 47 #include <sys/sx.h> 48 #include <sys/umtxvar.h> 49 #include <sys/unistd.h> 50 #include <sys/wait.h> 51 52 #include <vm/vm.h> 53 #include <vm/pmap.h> 54 #include <vm/vm_map.h> 55 56 #ifdef COMPAT_LINUX32 57 #include <machine/../linux32/linux.h> 58 #include <machine/../linux32/linux32_proto.h> 59 #else 60 #include <machine/../linux/linux.h> 61 #include <machine/../linux/linux_proto.h> 62 #endif 63 #include <compat/linux/linux.h> 64 #include <compat/linux/linux_emul.h> 65 #include <compat/linux/linux_fork.h> 66 #include <compat/linux/linux_futex.h> 67 #include <compat/linux/linux_mib.h> 68 #include <compat/linux/linux_misc.h> 69 #include <compat/linux/linux_util.h> 70 71 #ifdef LINUX_LEGACY_SYSCALLS 72 int 73 linux_fork(struct thread *td, struct linux_fork_args *args) 74 { 75 struct fork_req fr; 76 int error; 77 struct proc *p2; 78 struct thread *td2; 79 80 bzero(&fr, sizeof(fr)); 81 fr.fr_flags = RFFDG | RFPROC | RFSTOPPED; 82 fr.fr_procp = &p2; 83 if ((error = fork1(td, &fr)) != 0) 84 return (error); 85 86 td2 = FIRST_THREAD_IN_PROC(p2); 87 88 linux_proc_init(td, td2, false); 89 90 td->td_retval[0] = p2->p_pid; 91 92 /* 93 * Make this runnable after we are finished with it. 94 */ 95 thread_lock(td2); 96 TD_SET_CAN_RUN(td2); 97 sched_add(td2, SRQ_BORING); 98 99 return (0); 100 } 101 102 int 103 linux_vfork(struct thread *td, struct linux_vfork_args *args) 104 { 105 struct fork_req fr; 106 int error; 107 struct proc *p2; 108 struct thread *td2; 109 110 bzero(&fr, sizeof(fr)); 111 fr.fr_flags = RFFDG | RFPROC | RFMEM | RFPPWAIT | RFSTOPPED; 112 fr.fr_procp = &p2; 113 if ((error = fork1(td, &fr)) != 0) 114 return (error); 115 116 td2 = FIRST_THREAD_IN_PROC(p2); 117 118 linux_proc_init(td, td2, false); 119 120 td->td_retval[0] = p2->p_pid; 121 122 /* 123 * Make this runnable after we are finished with it. 124 */ 125 thread_lock(td2); 126 TD_SET_CAN_RUN(td2); 127 sched_add(td2, SRQ_BORING); 128 129 return (0); 130 } 131 #endif 132 133 static int 134 linux_clone_proc(struct thread *td, struct l_clone_args *args) 135 { 136 struct fork_req fr; 137 int error, ff, f2; 138 struct proc *p2; 139 struct thread *td2; 140 int exit_signal; 141 struct linux_emuldata *em; 142 143 f2 = 0; 144 ff = RFPROC | RFSTOPPED; 145 if (LINUX_SIG_VALID(args->exit_signal)) { 146 exit_signal = linux_to_bsd_signal(args->exit_signal); 147 } else if (args->exit_signal != 0) 148 return (EINVAL); 149 else 150 exit_signal = 0; 151 152 if (args->flags & LINUX_CLONE_VM) 153 ff |= RFMEM; 154 if (args->flags & LINUX_CLONE_SIGHAND) 155 ff |= RFSIGSHARE; 156 if ((args->flags & LINUX_CLONE_CLEAR_SIGHAND) != 0) 157 f2 |= FR2_DROPSIG_CAUGHT; 158 if (args->flags & LINUX_CLONE_FILES) { 159 if (!(args->flags & LINUX_CLONE_FS)) 160 f2 |= FR2_SHARE_PATHS; 161 } else { 162 ff |= RFFDG; 163 if (args->flags & LINUX_CLONE_FS) 164 f2 |= FR2_SHARE_PATHS; 165 } 166 167 if (args->flags & LINUX_CLONE_PARENT_SETTID) 168 if (args->parent_tid == NULL) 169 return (EINVAL); 170 171 if (args->flags & LINUX_CLONE_VFORK) 172 ff |= RFPPWAIT; 173 174 bzero(&fr, sizeof(fr)); 175 fr.fr_flags = ff; 176 fr.fr_flags2 = f2; 177 fr.fr_procp = &p2; 178 error = fork1(td, &fr); 179 if (error) 180 return (error); 181 182 td2 = FIRST_THREAD_IN_PROC(p2); 183 184 /* create the emuldata */ 185 linux_proc_init(td, td2, false); 186 187 em = em_find(td2); 188 KASSERT(em != NULL, ("clone_proc: emuldata not found.\n")); 189 190 if (args->flags & LINUX_CLONE_CHILD_SETTID) 191 em->child_set_tid = args->child_tid; 192 else 193 em->child_set_tid = NULL; 194 195 if (args->flags & LINUX_CLONE_CHILD_CLEARTID) 196 em->child_clear_tid = args->child_tid; 197 else 198 em->child_clear_tid = NULL; 199 200 if (args->flags & LINUX_CLONE_PARENT_SETTID) { 201 error = copyout(&p2->p_pid, args->parent_tid, 202 sizeof(p2->p_pid)); 203 if (error) 204 linux_msg(td, "copyout p_pid failed!"); 205 } 206 207 PROC_LOCK(p2); 208 p2->p_sigparent = exit_signal; 209 PROC_UNLOCK(p2); 210 /* 211 * In a case of stack = NULL, we are supposed to COW calling process 212 * stack. This is what normal fork() does, so we just keep tf_rsp arg 213 * intact. 214 */ 215 linux_set_upcall(td2, args->stack); 216 217 if (args->flags & LINUX_CLONE_SETTLS) 218 linux_set_cloned_tls(td2, PTRIN(args->tls)); 219 220 /* 221 * If CLONE_PARENT is set, then the parent of the new process will be 222 * the same as that of the calling process. 223 */ 224 if (args->flags & LINUX_CLONE_PARENT) { 225 sx_xlock(&proctree_lock); 226 PROC_LOCK(p2); 227 proc_reparent(p2, td->td_proc->p_pptr, true); 228 PROC_UNLOCK(p2); 229 sx_xunlock(&proctree_lock); 230 } 231 232 /* 233 * Make this runnable after we are finished with it. 234 */ 235 thread_lock(td2); 236 TD_SET_CAN_RUN(td2); 237 sched_add(td2, SRQ_BORING); 238 239 td->td_retval[0] = p2->p_pid; 240 241 return (0); 242 } 243 244 static int 245 linux_clone_thread(struct thread *td, struct l_clone_args *args) 246 { 247 struct linux_emuldata *em; 248 struct thread *newtd; 249 struct proc *p; 250 int error; 251 252 LINUX_CTR4(clone_thread, "thread(%d) flags %x ptid %p ctid %p", 253 td->td_tid, (unsigned)args->flags, 254 args->parent_tid, args->child_tid); 255 256 if ((args->flags & LINUX_CLONE_PARENT) != 0) 257 return (EINVAL); 258 if (args->flags & LINUX_CLONE_PARENT_SETTID) 259 if (args->parent_tid == NULL) 260 return (EINVAL); 261 262 /* Threads should be created with own stack */ 263 if (PTRIN(args->stack) == NULL) 264 return (EINVAL); 265 266 p = td->td_proc; 267 268 #ifdef RACCT 269 if (racct_enable) { 270 PROC_LOCK(p); 271 error = racct_add(p, RACCT_NTHR, 1); 272 PROC_UNLOCK(p); 273 if (error != 0) 274 return (EPROCLIM); 275 } 276 #endif 277 278 /* Initialize our td */ 279 error = kern_thr_alloc(p, 0, &newtd); 280 if (error) 281 goto fail; 282 283 cpu_copy_thread(newtd, td); 284 285 bzero(&newtd->td_startzero, 286 __rangeof(struct thread, td_startzero, td_endzero)); 287 bcopy(&td->td_startcopy, &newtd->td_startcopy, 288 __rangeof(struct thread, td_startcopy, td_endcopy)); 289 290 newtd->td_proc = p; 291 thread_cow_get(newtd, td); 292 293 /* create the emuldata */ 294 linux_proc_init(td, newtd, true); 295 296 em = em_find(newtd); 297 KASSERT(em != NULL, ("clone_thread: emuldata not found.\n")); 298 299 if (args->flags & LINUX_CLONE_SETTLS) 300 linux_set_cloned_tls(newtd, PTRIN(args->tls)); 301 302 if (args->flags & LINUX_CLONE_CHILD_SETTID) 303 em->child_set_tid = args->child_tid; 304 else 305 em->child_set_tid = NULL; 306 307 if (args->flags & LINUX_CLONE_CHILD_CLEARTID) 308 em->child_clear_tid = args->child_tid; 309 else 310 em->child_clear_tid = NULL; 311 312 cpu_thread_clean(newtd); 313 314 linux_set_upcall(newtd, args->stack); 315 316 PROC_LOCK(p); 317 p->p_flag |= P_HADTHREADS; 318 thread_link(newtd, p); 319 bcopy(p->p_comm, newtd->td_name, sizeof(newtd->td_name)); 320 321 thread_lock(td); 322 /* let the scheduler know about these things. */ 323 sched_fork_thread(td, newtd); 324 thread_unlock(td); 325 if (P_SHOULDSTOP(p)) 326 newtd->td_flags |= TDF_ASTPENDING | TDF_NEEDSUSPCHK; 327 328 if (p->p_ptevents & PTRACE_LWP) 329 newtd->td_dbgflags |= TDB_BORN; 330 PROC_UNLOCK(p); 331 332 tidhash_add(newtd); 333 334 LINUX_CTR2(clone_thread, "thread(%d) successful clone to %d", 335 td->td_tid, newtd->td_tid); 336 337 if (args->flags & LINUX_CLONE_PARENT_SETTID) { 338 error = copyout(&newtd->td_tid, args->parent_tid, 339 sizeof(newtd->td_tid)); 340 if (error) 341 linux_msg(td, "clone_thread: copyout td_tid failed!"); 342 } 343 344 /* 345 * Make this runnable after we are finished with it. 346 */ 347 thread_lock(newtd); 348 TD_SET_CAN_RUN(newtd); 349 sched_add(newtd, SRQ_BORING); 350 351 td->td_retval[0] = newtd->td_tid; 352 353 return (0); 354 355 fail: 356 #ifdef RACCT 357 if (racct_enable) { 358 PROC_LOCK(p); 359 racct_sub(p, RACCT_NTHR, 1); 360 PROC_UNLOCK(p); 361 } 362 #endif 363 return (error); 364 } 365 366 int 367 linux_clone(struct thread *td, struct linux_clone_args *args) 368 { 369 struct l_clone_args ca = { 370 .flags = (lower_32_bits(args->flags) & ~LINUX_CSIGNAL), 371 .child_tid = args->child_tidptr, 372 .parent_tid = args->parent_tidptr, 373 .exit_signal = (lower_32_bits(args->flags) & LINUX_CSIGNAL), 374 .stack = args->stack, 375 .tls = args->tls, 376 }; 377 378 if (args->flags & LINUX_CLONE_THREAD) 379 return (linux_clone_thread(td, &ca)); 380 else 381 return (linux_clone_proc(td, &ca)); 382 } 383 384 385 static int 386 linux_clone3_args_valid(struct l_user_clone_args *uca) 387 { 388 389 /* Verify that no unknown flags are passed along. */ 390 if ((uca->flags & ~(LINUX_CLONE_LEGACY_FLAGS | 391 LINUX_CLONE_CLEAR_SIGHAND | LINUX_CLONE_INTO_CGROUP)) != 0) 392 return (EINVAL); 393 if ((uca->flags & (LINUX_CLONE_DETACHED | LINUX_CSIGNAL)) != 0) 394 return (EINVAL); 395 396 if ((uca->flags & (LINUX_CLONE_SIGHAND | LINUX_CLONE_CLEAR_SIGHAND)) == 397 (LINUX_CLONE_SIGHAND | LINUX_CLONE_CLEAR_SIGHAND)) 398 return (EINVAL); 399 if ((uca->flags & (LINUX_CLONE_THREAD | LINUX_CLONE_PARENT)) != 0 && 400 uca->exit_signal != 0) 401 return (EINVAL); 402 403 /* We don't support set_tid, only validate input. */ 404 if (uca->set_tid_size > LINUX_MAX_PID_NS_LEVEL) 405 return (EINVAL); 406 if (uca->set_tid == 0 && uca->set_tid_size > 0) 407 return (EINVAL); 408 if (uca->set_tid != 0 && uca->set_tid_size == 0) 409 return (EINVAL); 410 411 if (uca->stack == 0 && uca->stack_size > 0) 412 return (EINVAL); 413 if (uca->stack != 0 && uca->stack_size == 0) 414 return (EINVAL); 415 416 /* Verify that higher 32bits of exit_signal are unset. */ 417 if ((uca->exit_signal & ~(uint64_t)LINUX_CSIGNAL) != 0) 418 return (EINVAL); 419 420 /* Verify that no unsupported flags are passed along. */ 421 if ((uca->flags & LINUX_CLONE_NEWTIME) != 0) { 422 LINUX_RATELIMIT_MSG("unsupported clone3 option CLONE_NEWTIME"); 423 return (ENOSYS); 424 } 425 if ((uca->flags & LINUX_CLONE_INTO_CGROUP) != 0) { 426 LINUX_RATELIMIT_MSG("unsupported clone3 option CLONE_INTO_CGROUP"); 427 return (ENOSYS); 428 } 429 if (uca->set_tid != 0 || uca->set_tid_size != 0) { 430 LINUX_RATELIMIT_MSG("unsupported clone3 set_tid"); 431 return (ENOSYS); 432 } 433 434 return (0); 435 } 436 437 int 438 linux_clone3(struct thread *td, struct linux_clone3_args *args) 439 { 440 struct l_user_clone_args *uca; 441 struct l_clone_args *ca; 442 size_t size; 443 int error; 444 445 if (args->usize > PAGE_SIZE) 446 return (E2BIG); 447 if (args->usize < LINUX_CLONE_ARGS_SIZE_VER0) 448 return (EINVAL); 449 450 /* 451 * usize can be less than size of struct clone_args, to avoid using 452 * of uninitialized data of struct clone_args, allocate at least 453 * sizeof(struct clone_args) storage and zero it. 454 */ 455 size = max(args->usize, sizeof(*uca)); 456 uca = malloc(size, M_LINUX, M_WAITOK | M_ZERO); 457 error = copyin(args->uargs, uca, args->usize); 458 if (error != 0) 459 goto out; 460 error = linux_clone3_args_valid(uca); 461 if (error != 0) 462 goto out; 463 ca = malloc(sizeof(*ca), M_LINUX, M_WAITOK | M_ZERO); 464 ca->flags = uca->flags; 465 ca->child_tid = PTRIN(uca->child_tid); 466 ca->parent_tid = PTRIN(uca->parent_tid); 467 ca->exit_signal = uca->exit_signal; 468 ca->stack = uca->stack + uca->stack_size; 469 ca->stack_size = uca->stack_size; 470 ca->tls = uca->tls; 471 472 if ((ca->flags & LINUX_CLONE_THREAD) != 0) 473 error = linux_clone_thread(td, ca); 474 else 475 error = linux_clone_proc(td, ca); 476 free(ca, M_LINUX); 477 out: 478 free(uca, M_LINUX); 479 return (error); 480 } 481 482 int 483 linux_exit(struct thread *td, struct linux_exit_args *args) 484 { 485 struct linux_emuldata *em; 486 487 em = em_find(td); 488 KASSERT(em != NULL, ("exit: emuldata not found.\n")); 489 490 LINUX_CTR2(exit, "thread(%d) (%d)", em->em_tid, args->rval); 491 492 linux_thread_detach(td); 493 494 /* 495 * XXX. When the last two threads of a process 496 * exit via pthread_exit() try thr_exit() first. 497 */ 498 kern_thr_exit(td); 499 exit1(td, args->rval, 0); 500 /* NOTREACHED */ 501 } 502 503 int 504 linux_set_tid_address(struct thread *td, struct linux_set_tid_address_args *args) 505 { 506 struct linux_emuldata *em; 507 508 em = em_find(td); 509 KASSERT(em != NULL, ("set_tid_address: emuldata not found.\n")); 510 511 em->child_clear_tid = args->tidptr; 512 513 td->td_retval[0] = em->em_tid; 514 515 LINUX_CTR3(set_tid_address, "tidptr(%d) %p, returns %d", 516 em->em_tid, args->tidptr, td->td_retval[0]); 517 518 return (0); 519 } 520 521 void 522 linux_thread_detach(struct thread *td) 523 { 524 struct linux_emuldata *em; 525 int *child_clear_tid; 526 int error; 527 528 em = em_find(td); 529 KASSERT(em != NULL, ("thread_detach: emuldata not found.\n")); 530 531 LINUX_CTR1(thread_detach, "thread(%d)", em->em_tid); 532 533 release_futexes(td, em); 534 535 child_clear_tid = em->child_clear_tid; 536 537 if (child_clear_tid != NULL) { 538 LINUX_CTR2(thread_detach, "thread(%d) %p", 539 em->em_tid, child_clear_tid); 540 541 error = suword32(child_clear_tid, 0); 542 if (error != 0) 543 return; 544 545 error = futex_wake(td, child_clear_tid, 1, false); 546 /* 547 * this cannot happen at the moment and if this happens it 548 * probably means there is a user space bug 549 */ 550 if (error != 0) 551 linux_msg(td, "futex stuff in thread_detach failed."); 552 } 553 554 /* 555 * Do not rely on the robust list which is maintained by userspace, 556 * cleanup remaining pi (if any) after release_futexes anyway. 557 */ 558 umtx_thread_exit(td); 559 } 560