1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2003, Jeffrey Roberson <jeff@freebsd.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include "opt_ktrace.h" 30 #include "opt_posix.h" 31 #include "opt_hwpmc_hooks.h" 32 #include "opt_hwt_hooks.h" 33 #include <sys/systm.h> 34 #include <sys/kernel.h> 35 #ifdef KTRACE 36 #include <sys/ktrace.h> 37 #endif 38 #include <sys/limits.h> 39 #include <sys/lock.h> 40 #include <sys/mutex.h> 41 #include <sys/priv.h> 42 #include <sys/proc.h> 43 #include <sys/posix4.h> 44 #include <sys/ptrace.h> 45 #include <sys/racct.h> 46 #include <sys/resourcevar.h> 47 #include <sys/rtprio.h> 48 #include <sys/rwlock.h> 49 #include <sys/sched.h> 50 #include <sys/sysctl.h> 51 #include <sys/smp.h> 52 #include <sys/syscallsubr.h> 53 #include <sys/sysent.h> 54 #include <sys/sysproto.h> 55 #include <sys/signalvar.h> 56 #include <sys/sysctl.h> 57 #include <sys/thr.h> 58 #include <sys/ucontext.h> 59 #include <sys/umtxvar.h> 60 #ifdef HWPMC_HOOKS 61 #include <sys/pmckern.h> 62 #endif 63 #ifdef HWT_HOOKS 64 #include <dev/hwt/hwt_hook.h> 65 #endif 66 67 #include <machine/frame.h> 68 69 #include <security/audit/audit.h> 70 71 static SYSCTL_NODE(_kern, OID_AUTO, threads, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 72 "thread allocation"); 73 74 int max_threads_per_proc = 1500; 75 SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_per_proc, CTLFLAG_RW, 76 &max_threads_per_proc, 0, "Limit on threads per proc"); 77 78 static int max_threads_hits; 79 SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_hits, CTLFLAG_RD, 80 &max_threads_hits, 0, "kern.threads.max_threads_per_proc hit count"); 81 82 #ifdef COMPAT_FREEBSD32 83 84 static inline int 85 suword_lwpid(void *addr, lwpid_t lwpid) 86 { 87 int error; 88 89 if (SV_CURPROC_FLAG(SV_LP64)) 90 error = suword(addr, lwpid); 91 else 92 error = suword32(addr, lwpid); 93 return (error); 94 } 95 96 #else 97 #define suword_lwpid suword 98 #endif 99 100 /* 101 * System call interface. 102 */ 103 104 struct thr_create_initthr_args { 105 ucontext_t ctx; 106 long *tid; 107 }; 108 109 static int 110 thr_create_initthr(struct thread *td, void *thunk) 111 { 112 struct thr_create_initthr_args *args; 113 114 /* Copy out the child tid. */ 115 args = thunk; 116 if (args->tid != NULL && suword_lwpid(args->tid, td->td_tid)) 117 return (EFAULT); 118 119 return (set_mcontext(td, &args->ctx.uc_mcontext)); 120 } 121 122 int 123 sys_thr_create(struct thread *td, struct thr_create_args *uap) 124 /* ucontext_t *ctx, long *id, int flags */ 125 { 126 struct thr_create_initthr_args args; 127 int error; 128 129 if ((error = copyin(uap->ctx, &args.ctx, sizeof(args.ctx)))) 130 return (error); 131 args.tid = uap->id; 132 return (thread_create(td, NULL, thr_create_initthr, &args)); 133 } 134 135 int 136 sys_thr_new(struct thread *td, struct thr_new_args *uap) 137 /* struct thr_param * */ 138 { 139 struct thr_param param; 140 int error; 141 142 if (uap->param_size < 0 || uap->param_size > sizeof(param)) 143 return (EINVAL); 144 bzero(¶m, sizeof(param)); 145 if ((error = copyin(uap->param, ¶m, uap->param_size))) 146 return (error); 147 return (kern_thr_new(td, ¶m)); 148 } 149 150 static int 151 thr_new_initthr(struct thread *td, void *thunk) 152 { 153 stack_t stack; 154 struct thr_param *param; 155 int error; 156 157 /* 158 * Here we copy out tid to two places, one for child and one 159 * for parent, because pthread can create a detached thread, 160 * if parent wants to safely access child tid, it has to provide 161 * its storage, because child thread may exit quickly and 162 * memory is freed before parent thread can access it. 163 */ 164 param = thunk; 165 if ((param->child_tid != NULL && 166 suword_lwpid(param->child_tid, td->td_tid)) || 167 (param->parent_tid != NULL && 168 suword_lwpid(param->parent_tid, td->td_tid))) 169 return (EFAULT); 170 171 /* Set up our machine context. */ 172 stack.ss_sp = param->stack_base; 173 stack.ss_size = param->stack_size; 174 /* Set upcall address to user thread entry function. */ 175 error = cpu_set_upcall(td, param->start_func, param->arg, &stack); 176 if (error != 0) 177 return (error); 178 /* Setup user TLS address and TLS pointer register. */ 179 return (cpu_set_user_tls(td, param->tls_base, param->flags)); 180 } 181 182 int 183 kern_thr_new(struct thread *td, struct thr_param *param) 184 { 185 struct rtprio rtp, *rtpp; 186 int error; 187 188 if ((param->flags & ~(THR_SUSPENDED | THR_SYSTEM_SCOPE | 189 THR_C_RUNTIME)) != 0) 190 return (EINVAL); 191 rtpp = NULL; 192 if (param->rtp != 0) { 193 error = copyin(param->rtp, &rtp, sizeof(struct rtprio)); 194 if (error) 195 return (error); 196 rtpp = &rtp; 197 } 198 #ifdef KTRACE 199 if (KTRPOINT(td, KTR_STRUCT)) 200 ktrthrparam(param); 201 #endif 202 return (thread_create(td, rtpp, thr_new_initthr, param)); 203 } 204 205 int 206 thread_create(struct thread *td, struct rtprio *rtp, 207 int (*initialize_thread)(struct thread *, void *), void *thunk) 208 { 209 struct thread *newtd; 210 struct proc *p; 211 int error; 212 213 p = td->td_proc; 214 215 if (rtp != NULL) { 216 switch(rtp->type) { 217 case RTP_PRIO_REALTIME: 218 case RTP_PRIO_FIFO: 219 /* Only root can set scheduler policy */ 220 if (priv_check(td, PRIV_SCHED_SETPOLICY) != 0) 221 return (EPERM); 222 if (rtp->prio > RTP_PRIO_MAX) 223 return (EINVAL); 224 break; 225 case RTP_PRIO_NORMAL: 226 rtp->prio = 0; 227 break; 228 default: 229 return (EINVAL); 230 } 231 } 232 233 #ifdef RACCT 234 if (racct_enable) { 235 PROC_LOCK(p); 236 error = racct_add(p, RACCT_NTHR, 1); 237 PROC_UNLOCK(p); 238 if (error != 0) 239 return (EPROCLIM); 240 } 241 #endif 242 243 /* Initialize our td */ 244 error = kern_thr_alloc(p, 0, &newtd); 245 if (error) 246 goto fail; 247 248 bzero(&newtd->td_startzero, 249 __rangeof(struct thread, td_startzero, td_endzero)); 250 bcopy(&td->td_startcopy, &newtd->td_startcopy, 251 __rangeof(struct thread, td_startcopy, td_endcopy)); 252 newtd->td_proc = td->td_proc; 253 newtd->td_rb_list = newtd->td_rbp_list = newtd->td_rb_inact = 0; 254 thread_cow_get(newtd, td); 255 256 cpu_copy_thread(newtd, td); 257 258 error = initialize_thread(newtd, thunk); 259 if (error != 0) { 260 thread_cow_free(newtd); 261 thread_free(newtd); 262 goto fail; 263 } 264 265 PROC_LOCK(p); 266 p->p_flag |= P_HADTHREADS; 267 thread_link(newtd, p); 268 bcopy(p->p_comm, newtd->td_name, sizeof(newtd->td_name)); 269 thread_lock(td); 270 /* let the scheduler know about these things. */ 271 sched_fork_thread(td, newtd); 272 thread_unlock(td); 273 if (P_SHOULDSTOP(p)) 274 ast_sched(newtd, TDA_SUSPEND); 275 if (p->p_ptevents & PTRACE_LWP) 276 newtd->td_dbgflags |= TDB_BORN; 277 278 PROC_UNLOCK(p); 279 #ifdef HWPMC_HOOKS 280 if (PMC_PROC_IS_USING_PMCS(p)) 281 PMC_CALL_HOOK(newtd, PMC_FN_THR_CREATE, NULL); 282 else if (PMC_SYSTEM_SAMPLING_ACTIVE()) 283 PMC_CALL_HOOK_UNLOCKED(newtd, PMC_FN_THR_CREATE_LOG, NULL); 284 #endif 285 286 #ifdef HWT_HOOKS 287 HWT_CALL_HOOK(newtd, HWT_THREAD_CREATE, NULL); 288 #endif 289 290 tidhash_add(newtd); 291 292 /* ignore timesharing class */ 293 if (rtp != NULL && !(td->td_pri_class == PRI_TIMESHARE && 294 rtp->type == RTP_PRIO_NORMAL)) 295 rtp_to_pri(rtp, newtd); 296 297 thread_lock(newtd); 298 TD_SET_CAN_RUN(newtd); 299 sched_add(newtd, SRQ_BORING); 300 301 return (0); 302 303 fail: 304 #ifdef RACCT 305 if (racct_enable) { 306 PROC_LOCK(p); 307 racct_sub(p, RACCT_NTHR, 1); 308 PROC_UNLOCK(p); 309 } 310 #endif 311 return (error); 312 } 313 314 int 315 sys_thr_self(struct thread *td, struct thr_self_args *uap) 316 /* long *id */ 317 { 318 int error; 319 320 error = suword_lwpid(uap->id, (unsigned)td->td_tid); 321 if (error == -1) 322 return (EFAULT); 323 return (0); 324 } 325 326 int 327 sys_thr_exit(struct thread *td, struct thr_exit_args *uap) 328 /* long *state */ 329 { 330 331 umtx_thread_exit(td); 332 333 /* Signal userland that it can free the stack. */ 334 if ((void *)uap->state != NULL) { 335 (void)suword_lwpid(uap->state, 1); 336 (void)kern_umtx_wake(td, uap->state, INT_MAX, 0); 337 } 338 339 return (kern_thr_exit(td)); 340 } 341 342 int 343 kern_thr_exit(struct thread *td) 344 { 345 struct proc *p; 346 347 p = td->td_proc; 348 349 /* 350 * Clear kernel ASTs in advance of selecting the last exiting 351 * thread and acquiring schedulers locks. It is fine to 352 * clear the ASTs here even if we are not going to exit after 353 * all. On the other hand, leaving them pending could trigger 354 * execution in subsystems in a context where they are not 355 * prepared to handle top kernel actions, even in execution of 356 * an unrelated thread. 357 */ 358 ast_kclear(td); 359 360 /* 361 * If all of the threads in a process call this routine to 362 * exit (e.g. all threads call pthread_exit()), exactly one 363 * thread should return to the caller to terminate the process 364 * instead of the thread. 365 * 366 * Checking p_numthreads alone is not sufficient since threads 367 * might be committed to terminating while the PROC_LOCK is 368 * dropped in either ptracestop() or while removing this thread 369 * from the tidhash. Instead, the p_pendingexits field holds 370 * the count of threads in either of those states and a thread 371 * is considered the "last" thread if all of the other threads 372 * in a process are already terminating. 373 */ 374 PROC_LOCK(p); 375 if (p->p_numthreads == p->p_pendingexits + 1) { 376 /* 377 * Ignore attempts to shut down last thread in the 378 * proc. This will actually call _exit(2) in the 379 * usermode trampoline when it returns. 380 */ 381 PROC_UNLOCK(p); 382 return (0); 383 } 384 385 if (p->p_sysent->sv_ontdexit != NULL) 386 p->p_sysent->sv_ontdexit(td); 387 388 td->td_dbgflags |= TDB_EXIT; 389 if (p->p_ptevents & PTRACE_LWP) { 390 p->p_pendingexits++; 391 ptracestop(td, SIGTRAP, NULL); 392 p->p_pendingexits--; 393 } 394 tidhash_remove(td); 395 396 /* 397 * The check above should prevent all other threads from this 398 * process from exiting while the PROC_LOCK is dropped, so 399 * there must be at least one other thread other than the 400 * current thread. 401 */ 402 KASSERT(p->p_numthreads > 1, ("too few threads")); 403 racct_sub(p, RACCT_NTHR, 1); 404 tdsigcleanup(td); 405 406 #ifdef AUDIT 407 AUDIT_SYSCALL_EXIT(0, td); 408 #endif 409 410 PROC_SLOCK(p); 411 thread_stopped(p); 412 thread_exit(); 413 /* NOTREACHED */ 414 } 415 416 int 417 sys_thr_kill(struct thread *td, struct thr_kill_args *uap) 418 /* long id, int sig */ 419 { 420 ksiginfo_t ksi; 421 struct thread *ttd; 422 struct proc *p; 423 int error; 424 425 p = td->td_proc; 426 ksiginfo_init(&ksi); 427 ksi.ksi_signo = uap->sig; 428 ksi.ksi_code = SI_LWP; 429 ksi.ksi_pid = p->p_pid; 430 ksi.ksi_uid = td->td_ucred->cr_ruid; 431 if (uap->id == -1) { 432 if (uap->sig != 0 && !_SIG_VALID(uap->sig)) { 433 error = EINVAL; 434 } else { 435 error = ESRCH; 436 PROC_LOCK(p); 437 FOREACH_THREAD_IN_PROC(p, ttd) { 438 if (ttd != td) { 439 error = 0; 440 if (uap->sig == 0) 441 break; 442 tdksignal(ttd, uap->sig, &ksi); 443 } 444 } 445 PROC_UNLOCK(p); 446 } 447 } else { 448 error = 0; 449 ttd = tdfind((lwpid_t)uap->id, p->p_pid); 450 if (ttd == NULL) 451 return (ESRCH); 452 if (uap->sig == 0) 453 ; 454 else if (!_SIG_VALID(uap->sig)) 455 error = EINVAL; 456 else 457 tdksignal(ttd, uap->sig, &ksi); 458 PROC_UNLOCK(ttd->td_proc); 459 } 460 return (error); 461 } 462 463 int 464 sys_thr_kill2(struct thread *td, struct thr_kill2_args *uap) 465 /* pid_t pid, long id, int sig */ 466 { 467 ksiginfo_t ksi; 468 struct thread *ttd; 469 struct proc *p; 470 int error; 471 472 AUDIT_ARG_SIGNUM(uap->sig); 473 474 ksiginfo_init(&ksi); 475 ksi.ksi_signo = uap->sig; 476 ksi.ksi_code = SI_LWP; 477 ksi.ksi_pid = td->td_proc->p_pid; 478 ksi.ksi_uid = td->td_ucred->cr_ruid; 479 if (uap->id == -1) { 480 if ((p = pfind(uap->pid)) == NULL) 481 return (ESRCH); 482 AUDIT_ARG_PROCESS(p); 483 error = p_cansignal(td, p, uap->sig); 484 if (error) { 485 PROC_UNLOCK(p); 486 return (error); 487 } 488 if (uap->sig != 0 && !_SIG_VALID(uap->sig)) { 489 error = EINVAL; 490 } else { 491 error = ESRCH; 492 FOREACH_THREAD_IN_PROC(p, ttd) { 493 if (ttd != td) { 494 error = 0; 495 if (uap->sig == 0) 496 break; 497 tdksignal(ttd, uap->sig, &ksi); 498 } 499 } 500 } 501 PROC_UNLOCK(p); 502 } else { 503 ttd = tdfind((lwpid_t)uap->id, uap->pid); 504 if (ttd == NULL) 505 return (ESRCH); 506 p = ttd->td_proc; 507 AUDIT_ARG_PROCESS(p); 508 error = p_cansignal(td, p, uap->sig); 509 if (uap->sig == 0) 510 ; 511 else if (!_SIG_VALID(uap->sig)) 512 error = EINVAL; 513 else 514 tdksignal(ttd, uap->sig, &ksi); 515 PROC_UNLOCK(p); 516 } 517 return (error); 518 } 519 520 int 521 sys_thr_suspend(struct thread *td, struct thr_suspend_args *uap) 522 /* const struct timespec *timeout */ 523 { 524 struct timespec ts, *tsp; 525 int error; 526 527 tsp = NULL; 528 if (uap->timeout != NULL) { 529 error = umtx_copyin_timeout(uap->timeout, &ts); 530 if (error != 0) 531 return (error); 532 tsp = &ts; 533 } 534 535 return (kern_thr_suspend(td, tsp)); 536 } 537 538 int 539 kern_thr_suspend(struct thread *td, struct timespec *tsp) 540 { 541 struct proc *p = td->td_proc; 542 struct timeval tv; 543 int error = 0; 544 int timo = 0; 545 546 if (td->td_pflags & TDP_WAKEUP) { 547 td->td_pflags &= ~TDP_WAKEUP; 548 return (0); 549 } 550 551 if (tsp != NULL) { 552 if (tsp->tv_sec == 0 && tsp->tv_nsec == 0) 553 error = EWOULDBLOCK; 554 else { 555 TIMESPEC_TO_TIMEVAL(&tv, tsp); 556 timo = tvtohz(&tv); 557 } 558 } 559 560 PROC_LOCK(p); 561 if (error == 0 && (td->td_flags & TDF_THRWAKEUP) == 0) 562 error = msleep((void *)td, &p->p_mtx, 563 PCATCH, "lthr", timo); 564 565 if (td->td_flags & TDF_THRWAKEUP) { 566 thread_lock(td); 567 td->td_flags &= ~TDF_THRWAKEUP; 568 thread_unlock(td); 569 PROC_UNLOCK(p); 570 return (0); 571 } 572 PROC_UNLOCK(p); 573 if (error == EWOULDBLOCK) 574 error = ETIMEDOUT; 575 else if (error == ERESTART) { 576 if (timo != 0) 577 error = EINTR; 578 } 579 return (error); 580 } 581 582 int 583 sys_thr_wake(struct thread *td, struct thr_wake_args *uap) 584 /* long id */ 585 { 586 struct proc *p; 587 struct thread *ttd; 588 589 if (uap->id == td->td_tid) { 590 td->td_pflags |= TDP_WAKEUP; 591 return (0); 592 } 593 594 p = td->td_proc; 595 ttd = tdfind((lwpid_t)uap->id, p->p_pid); 596 if (ttd == NULL) 597 return (ESRCH); 598 thread_lock(ttd); 599 ttd->td_flags |= TDF_THRWAKEUP; 600 thread_unlock(ttd); 601 wakeup((void *)ttd); 602 PROC_UNLOCK(p); 603 return (0); 604 } 605 606 int 607 sys_thr_set_name(struct thread *td, struct thr_set_name_args *uap) 608 { 609 struct proc *p; 610 char name[MAXCOMLEN + 1]; 611 struct thread *ttd; 612 int error; 613 614 error = 0; 615 name[0] = '\0'; 616 if (uap->name != NULL) { 617 error = copyinstr(uap->name, name, sizeof(name), NULL); 618 if (error == ENAMETOOLONG) { 619 error = copyin(uap->name, name, sizeof(name) - 1); 620 name[sizeof(name) - 1] = '\0'; 621 } 622 if (error) 623 return (error); 624 } 625 p = td->td_proc; 626 ttd = tdfind((lwpid_t)uap->id, p->p_pid); 627 if (ttd == NULL) 628 return (ESRCH); 629 strcpy(ttd->td_name, name); 630 #ifdef HWPMC_HOOKS 631 if (PMC_PROC_IS_USING_PMCS(p) || PMC_SYSTEM_SAMPLING_ACTIVE()) 632 PMC_CALL_HOOK_UNLOCKED(ttd, PMC_FN_THR_CREATE_LOG, NULL); 633 #endif 634 #ifdef HWT_HOOKS 635 HWT_CALL_HOOK(ttd, HWT_THREAD_SET_NAME, NULL); 636 #endif 637 #ifdef KTR 638 sched_clear_tdname(ttd); 639 #endif 640 PROC_UNLOCK(p); 641 return (error); 642 } 643 644 int 645 kern_thr_alloc(struct proc *p, int pages, struct thread **ntd) 646 { 647 648 /* Have race condition but it is cheap. */ 649 if (p->p_numthreads >= max_threads_per_proc) { 650 ++max_threads_hits; 651 return (EPROCLIM); 652 } 653 654 *ntd = thread_alloc(pages); 655 if (*ntd == NULL) 656 return (ENOMEM); 657 658 return (0); 659 } 660