1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2003, Jeffrey Roberson <jeff@freebsd.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include "opt_ktrace.h" 30 #include "opt_posix.h" 31 #include "opt_hwpmc_hooks.h" 32 #include "opt_hwt_hooks.h" 33 #include <sys/systm.h> 34 #include <sys/kernel.h> 35 #ifdef KTRACE 36 #include <sys/ktrace.h> 37 #endif 38 #include <sys/limits.h> 39 #include <sys/lock.h> 40 #include <sys/mutex.h> 41 #include <sys/priv.h> 42 #include <sys/proc.h> 43 #include <sys/posix4.h> 44 #include <sys/ptrace.h> 45 #include <sys/racct.h> 46 #include <sys/resourcevar.h> 47 #include <sys/rtprio.h> 48 #include <sys/rwlock.h> 49 #include <sys/sched.h> 50 #include <sys/sysctl.h> 51 #include <sys/smp.h> 52 #include <sys/syscallsubr.h> 53 #include <sys/sysent.h> 54 #include <sys/sysproto.h> 55 #include <sys/signalvar.h> 56 #include <sys/sysctl.h> 57 #include <sys/thr.h> 58 #include <sys/ucontext.h> 59 #include <sys/umtxvar.h> 60 #ifdef HWPMC_HOOKS 61 #include <sys/pmckern.h> 62 #endif 63 #ifdef HWT_HOOKS 64 #include <dev/hwt/hwt_hook.h> 65 #endif 66 67 #include <machine/frame.h> 68 69 #include <security/audit/audit.h> 70 71 static SYSCTL_NODE(_kern, OID_AUTO, threads, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 72 "thread allocation"); 73 74 int max_threads_per_proc = 1500; 75 SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_per_proc, CTLFLAG_RW, 76 &max_threads_per_proc, 0, "Limit on threads per proc"); 77 78 static int max_threads_hits; 79 SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_hits, CTLFLAG_RD, 80 &max_threads_hits, 0, "kern.threads.max_threads_per_proc hit count"); 81 82 #ifdef COMPAT_FREEBSD32 83 84 static inline int 85 suword_lwpid(void *addr, lwpid_t lwpid) 86 { 87 int error; 88 89 if (SV_CURPROC_FLAG(SV_LP64)) 90 error = suword(addr, lwpid); 91 else 92 error = suword32(addr, lwpid); 93 return (error); 94 } 95 96 #else 97 #define suword_lwpid suword 98 #endif 99 100 /* 101 * System call interface. 102 */ 103 104 struct thr_create_initthr_args { 105 ucontext_t ctx; 106 long *tid; 107 }; 108 109 static int 110 thr_create_initthr(struct thread *td, void *thunk) 111 { 112 struct thr_create_initthr_args *args; 113 114 /* Copy out the child tid. */ 115 args = thunk; 116 if (args->tid != NULL && suword_lwpid(args->tid, td->td_tid)) 117 return (EFAULT); 118 119 return (set_mcontext(td, &args->ctx.uc_mcontext)); 120 } 121 122 int 123 sys_thr_create(struct thread *td, struct thr_create_args *uap) 124 /* ucontext_t *ctx, long *id, int flags */ 125 { 126 struct thr_create_initthr_args args; 127 int error; 128 129 if ((error = copyin(uap->ctx, &args.ctx, sizeof(args.ctx)))) 130 return (error); 131 args.tid = uap->id; 132 return (thread_create(td, NULL, thr_create_initthr, &args)); 133 } 134 135 int 136 sys_thr_new(struct thread *td, struct thr_new_args *uap) 137 /* struct thr_param * */ 138 { 139 struct thr_param param; 140 int error; 141 142 if (uap->param_size < 0 || uap->param_size > sizeof(param)) 143 return (EINVAL); 144 bzero(¶m, sizeof(param)); 145 if ((error = copyin(uap->param, ¶m, uap->param_size))) 146 return (error); 147 return (kern_thr_new(td, ¶m)); 148 } 149 150 static int 151 thr_new_initthr(struct thread *td, void *thunk) 152 { 153 stack_t stack; 154 struct thr_param *param; 155 int error; 156 157 /* 158 * Here we copy out tid to two places, one for child and one 159 * for parent, because pthread can create a detached thread, 160 * if parent wants to safely access child tid, it has to provide 161 * its storage, because child thread may exit quickly and 162 * memory is freed before parent thread can access it. 163 */ 164 param = thunk; 165 if ((param->child_tid != NULL && 166 suword_lwpid(param->child_tid, td->td_tid)) || 167 (param->parent_tid != NULL && 168 suword_lwpid(param->parent_tid, td->td_tid))) 169 return (EFAULT); 170 171 /* Set up our machine context. */ 172 stack.ss_sp = param->stack_base; 173 stack.ss_size = param->stack_size; 174 /* Set upcall address to user thread entry function. */ 175 error = cpu_set_upcall(td, param->start_func, param->arg, &stack); 176 if (error != 0) 177 return (error); 178 /* Setup user TLS address and TLS pointer register. */ 179 return (cpu_set_user_tls(td, param->tls_base, param->flags)); 180 } 181 182 int 183 kern_thr_new(struct thread *td, struct thr_param *param) 184 { 185 struct rtprio rtp, *rtpp; 186 int error; 187 188 if ((param->flags & ~(THR_SUSPENDED | THR_SYSTEM_SCOPE | 189 THR_C_RUNTIME)) != 0) 190 return (EINVAL); 191 rtpp = NULL; 192 if (param->rtp != 0) { 193 error = copyin(param->rtp, &rtp, sizeof(struct rtprio)); 194 if (error) 195 return (error); 196 rtpp = &rtp; 197 } 198 #ifdef KTRACE 199 if (KTRPOINT(td, KTR_STRUCT)) 200 ktrthrparam(param); 201 #endif 202 return (thread_create(td, rtpp, thr_new_initthr, param)); 203 } 204 205 int 206 thread_create(struct thread *td, struct rtprio *rtp, 207 int (*initialize_thread)(struct thread *, void *), void *thunk) 208 { 209 struct thread *newtd; 210 struct proc *p; 211 int error; 212 213 p = td->td_proc; 214 215 if (rtp != NULL) { 216 switch(rtp->type) { 217 case RTP_PRIO_REALTIME: 218 case RTP_PRIO_FIFO: 219 /* Only root can set scheduler policy */ 220 if (priv_check(td, PRIV_SCHED_SETPOLICY) != 0) 221 return (EPERM); 222 if (rtp->prio > RTP_PRIO_MAX) 223 return (EINVAL); 224 break; 225 case RTP_PRIO_NORMAL: 226 rtp->prio = 0; 227 break; 228 default: 229 return (EINVAL); 230 } 231 } 232 233 #ifdef RACCT 234 if (racct_enable) { 235 PROC_LOCK(p); 236 error = racct_add(p, RACCT_NTHR, 1); 237 PROC_UNLOCK(p); 238 if (error != 0) 239 return (EPROCLIM); 240 } 241 #endif 242 243 /* Initialize our td */ 244 error = kern_thr_alloc(p, 0, &newtd); 245 if (error) 246 goto fail; 247 248 bzero(&newtd->td_startzero, 249 __rangeof(struct thread, td_startzero, td_endzero)); 250 bcopy(&td->td_startcopy, &newtd->td_startcopy, 251 __rangeof(struct thread, td_startcopy, td_endcopy)); 252 newtd->td_proc = td->td_proc; 253 newtd->td_rb_list = newtd->td_rbp_list = newtd->td_rb_inact = 0; 254 thread_cow_get(newtd, td); 255 256 cpu_copy_thread(newtd, td); 257 258 error = initialize_thread(newtd, thunk); 259 if (error != 0) { 260 thread_cow_free(newtd); 261 thread_free(newtd); 262 goto fail; 263 } 264 265 PROC_LOCK(p); 266 p->p_flag |= P_HADTHREADS; 267 thread_link(newtd, p); 268 bcopy(p->p_comm, newtd->td_name, sizeof(newtd->td_name)); 269 thread_lock(td); 270 /* let the scheduler know about these things. */ 271 sched_fork_thread(td, newtd); 272 thread_unlock(td); 273 if (P_SHOULDSTOP(p)) 274 ast_sched(newtd, TDA_SUSPEND); 275 if (p->p_ptevents & PTRACE_LWP) 276 newtd->td_dbgflags |= TDB_BORN; 277 278 PROC_UNLOCK(p); 279 #ifdef HWPMC_HOOKS 280 if (PMC_PROC_IS_USING_PMCS(p)) 281 PMC_CALL_HOOK(newtd, PMC_FN_THR_CREATE, NULL); 282 else if (PMC_SYSTEM_SAMPLING_ACTIVE()) 283 PMC_CALL_HOOK_UNLOCKED(newtd, PMC_FN_THR_CREATE_LOG, NULL); 284 #endif 285 286 #ifdef HWT_HOOKS 287 HWT_CALL_HOOK(newtd, HWT_THREAD_CREATE, NULL); 288 #endif 289 290 tidhash_add(newtd); 291 292 /* ignore timesharing class */ 293 if (rtp != NULL && !(td->td_pri_class == PRI_TIMESHARE && 294 rtp->type == RTP_PRIO_NORMAL)) 295 rtp_to_pri(rtp, newtd); 296 297 thread_lock(newtd); 298 TD_SET_CAN_RUN(newtd); 299 sched_add(newtd, SRQ_BORING); 300 301 return (0); 302 303 fail: 304 #ifdef RACCT 305 if (racct_enable) { 306 PROC_LOCK(p); 307 racct_sub(p, RACCT_NTHR, 1); 308 PROC_UNLOCK(p); 309 } 310 #endif 311 return (error); 312 } 313 314 int 315 sys_thr_self(struct thread *td, struct thr_self_args *uap) 316 /* long *id */ 317 { 318 int error; 319 320 error = suword_lwpid(uap->id, (unsigned)td->td_tid); 321 if (error == -1) 322 return (EFAULT); 323 return (0); 324 } 325 326 int 327 sys_thr_exit(struct thread *td, struct thr_exit_args *uap) 328 /* long *state */ 329 { 330 331 umtx_thread_exit(td); 332 333 /* Signal userland that it can free the stack. */ 334 if ((void *)uap->state != NULL) { 335 (void)suword_lwpid(uap->state, 1); 336 (void)kern_umtx_wake(td, uap->state, INT_MAX, 0); 337 } 338 339 return (kern_thr_exit(td)); 340 } 341 342 int 343 kern_thr_exit(struct thread *td) 344 { 345 struct proc *p; 346 347 p = td->td_proc; 348 349 /* 350 * If all of the threads in a process call this routine to 351 * exit (e.g. all threads call pthread_exit()), exactly one 352 * thread should return to the caller to terminate the process 353 * instead of the thread. 354 * 355 * Checking p_numthreads alone is not sufficient since threads 356 * might be committed to terminating while the PROC_LOCK is 357 * dropped in either ptracestop() or while removing this thread 358 * from the tidhash. Instead, the p_pendingexits field holds 359 * the count of threads in either of those states and a thread 360 * is considered the "last" thread if all of the other threads 361 * in a process are already terminating. 362 */ 363 PROC_LOCK(p); 364 if (p->p_numthreads == p->p_pendingexits + 1) { 365 /* 366 * Ignore attempts to shut down last thread in the 367 * proc. This will actually call _exit(2) in the 368 * usermode trampoline when it returns. 369 */ 370 PROC_UNLOCK(p); 371 return (0); 372 } 373 374 if (p->p_sysent->sv_ontdexit != NULL) 375 p->p_sysent->sv_ontdexit(td); 376 377 td->td_dbgflags |= TDB_EXIT; 378 if (p->p_ptevents & PTRACE_LWP) { 379 p->p_pendingexits++; 380 ptracestop(td, SIGTRAP, NULL); 381 p->p_pendingexits--; 382 } 383 tidhash_remove(td); 384 385 /* 386 * The check above should prevent all other threads from this 387 * process from exiting while the PROC_LOCK is dropped, so 388 * there must be at least one other thread other than the 389 * current thread. 390 */ 391 KASSERT(p->p_numthreads > 1, ("too few threads")); 392 racct_sub(p, RACCT_NTHR, 1); 393 tdsigcleanup(td); 394 395 #ifdef AUDIT 396 AUDIT_SYSCALL_EXIT(0, td); 397 #endif 398 399 PROC_SLOCK(p); 400 thread_stopped(p); 401 thread_exit(); 402 /* NOTREACHED */ 403 } 404 405 int 406 sys_thr_kill(struct thread *td, struct thr_kill_args *uap) 407 /* long id, int sig */ 408 { 409 ksiginfo_t ksi; 410 struct thread *ttd; 411 struct proc *p; 412 int error; 413 414 p = td->td_proc; 415 ksiginfo_init(&ksi); 416 ksi.ksi_signo = uap->sig; 417 ksi.ksi_code = SI_LWP; 418 ksi.ksi_pid = p->p_pid; 419 ksi.ksi_uid = td->td_ucred->cr_ruid; 420 if (uap->id == -1) { 421 if (uap->sig != 0 && !_SIG_VALID(uap->sig)) { 422 error = EINVAL; 423 } else { 424 error = ESRCH; 425 PROC_LOCK(p); 426 FOREACH_THREAD_IN_PROC(p, ttd) { 427 if (ttd != td) { 428 error = 0; 429 if (uap->sig == 0) 430 break; 431 tdksignal(ttd, uap->sig, &ksi); 432 } 433 } 434 PROC_UNLOCK(p); 435 } 436 } else { 437 error = 0; 438 ttd = tdfind((lwpid_t)uap->id, p->p_pid); 439 if (ttd == NULL) 440 return (ESRCH); 441 if (uap->sig == 0) 442 ; 443 else if (!_SIG_VALID(uap->sig)) 444 error = EINVAL; 445 else 446 tdksignal(ttd, uap->sig, &ksi); 447 PROC_UNLOCK(ttd->td_proc); 448 } 449 return (error); 450 } 451 452 int 453 sys_thr_kill2(struct thread *td, struct thr_kill2_args *uap) 454 /* pid_t pid, long id, int sig */ 455 { 456 ksiginfo_t ksi; 457 struct thread *ttd; 458 struct proc *p; 459 int error; 460 461 AUDIT_ARG_SIGNUM(uap->sig); 462 463 ksiginfo_init(&ksi); 464 ksi.ksi_signo = uap->sig; 465 ksi.ksi_code = SI_LWP; 466 ksi.ksi_pid = td->td_proc->p_pid; 467 ksi.ksi_uid = td->td_ucred->cr_ruid; 468 if (uap->id == -1) { 469 if ((p = pfind(uap->pid)) == NULL) 470 return (ESRCH); 471 AUDIT_ARG_PROCESS(p); 472 error = p_cansignal(td, p, uap->sig); 473 if (error) { 474 PROC_UNLOCK(p); 475 return (error); 476 } 477 if (uap->sig != 0 && !_SIG_VALID(uap->sig)) { 478 error = EINVAL; 479 } else { 480 error = ESRCH; 481 FOREACH_THREAD_IN_PROC(p, ttd) { 482 if (ttd != td) { 483 error = 0; 484 if (uap->sig == 0) 485 break; 486 tdksignal(ttd, uap->sig, &ksi); 487 } 488 } 489 } 490 PROC_UNLOCK(p); 491 } else { 492 ttd = tdfind((lwpid_t)uap->id, uap->pid); 493 if (ttd == NULL) 494 return (ESRCH); 495 p = ttd->td_proc; 496 AUDIT_ARG_PROCESS(p); 497 error = p_cansignal(td, p, uap->sig); 498 if (uap->sig == 0) 499 ; 500 else if (!_SIG_VALID(uap->sig)) 501 error = EINVAL; 502 else 503 tdksignal(ttd, uap->sig, &ksi); 504 PROC_UNLOCK(p); 505 } 506 return (error); 507 } 508 509 int 510 sys_thr_suspend(struct thread *td, struct thr_suspend_args *uap) 511 /* const struct timespec *timeout */ 512 { 513 struct timespec ts, *tsp; 514 int error; 515 516 tsp = NULL; 517 if (uap->timeout != NULL) { 518 error = umtx_copyin_timeout(uap->timeout, &ts); 519 if (error != 0) 520 return (error); 521 tsp = &ts; 522 } 523 524 return (kern_thr_suspend(td, tsp)); 525 } 526 527 int 528 kern_thr_suspend(struct thread *td, struct timespec *tsp) 529 { 530 struct proc *p = td->td_proc; 531 struct timeval tv; 532 int error = 0; 533 int timo = 0; 534 535 if (td->td_pflags & TDP_WAKEUP) { 536 td->td_pflags &= ~TDP_WAKEUP; 537 return (0); 538 } 539 540 if (tsp != NULL) { 541 if (tsp->tv_sec == 0 && tsp->tv_nsec == 0) 542 error = EWOULDBLOCK; 543 else { 544 TIMESPEC_TO_TIMEVAL(&tv, tsp); 545 timo = tvtohz(&tv); 546 } 547 } 548 549 PROC_LOCK(p); 550 if (error == 0 && (td->td_flags & TDF_THRWAKEUP) == 0) 551 error = msleep((void *)td, &p->p_mtx, 552 PCATCH, "lthr", timo); 553 554 if (td->td_flags & TDF_THRWAKEUP) { 555 thread_lock(td); 556 td->td_flags &= ~TDF_THRWAKEUP; 557 thread_unlock(td); 558 PROC_UNLOCK(p); 559 return (0); 560 } 561 PROC_UNLOCK(p); 562 if (error == EWOULDBLOCK) 563 error = ETIMEDOUT; 564 else if (error == ERESTART) { 565 if (timo != 0) 566 error = EINTR; 567 } 568 return (error); 569 } 570 571 int 572 sys_thr_wake(struct thread *td, struct thr_wake_args *uap) 573 /* long id */ 574 { 575 struct proc *p; 576 struct thread *ttd; 577 578 if (uap->id == td->td_tid) { 579 td->td_pflags |= TDP_WAKEUP; 580 return (0); 581 } 582 583 p = td->td_proc; 584 ttd = tdfind((lwpid_t)uap->id, p->p_pid); 585 if (ttd == NULL) 586 return (ESRCH); 587 thread_lock(ttd); 588 ttd->td_flags |= TDF_THRWAKEUP; 589 thread_unlock(ttd); 590 wakeup((void *)ttd); 591 PROC_UNLOCK(p); 592 return (0); 593 } 594 595 int 596 sys_thr_set_name(struct thread *td, struct thr_set_name_args *uap) 597 { 598 struct proc *p; 599 char name[MAXCOMLEN + 1]; 600 struct thread *ttd; 601 int error; 602 603 error = 0; 604 name[0] = '\0'; 605 if (uap->name != NULL) { 606 error = copyinstr(uap->name, name, sizeof(name), NULL); 607 if (error == ENAMETOOLONG) { 608 error = copyin(uap->name, name, sizeof(name) - 1); 609 name[sizeof(name) - 1] = '\0'; 610 } 611 if (error) 612 return (error); 613 } 614 p = td->td_proc; 615 ttd = tdfind((lwpid_t)uap->id, p->p_pid); 616 if (ttd == NULL) 617 return (ESRCH); 618 strcpy(ttd->td_name, name); 619 #ifdef HWPMC_HOOKS 620 if (PMC_PROC_IS_USING_PMCS(p) || PMC_SYSTEM_SAMPLING_ACTIVE()) 621 PMC_CALL_HOOK_UNLOCKED(ttd, PMC_FN_THR_CREATE_LOG, NULL); 622 #endif 623 #ifdef HWT_HOOKS 624 HWT_CALL_HOOK(ttd, HWT_THREAD_SET_NAME, NULL); 625 #endif 626 #ifdef KTR 627 sched_clear_tdname(ttd); 628 #endif 629 PROC_UNLOCK(p); 630 return (error); 631 } 632 633 int 634 kern_thr_alloc(struct proc *p, int pages, struct thread **ntd) 635 { 636 637 /* Have race condition but it is cheap. */ 638 if (p->p_numthreads >= max_threads_per_proc) { 639 ++max_threads_hits; 640 return (EPROCLIM); 641 } 642 643 *ntd = thread_alloc(pages); 644 if (*ntd == NULL) 645 return (ENOMEM); 646 647 return (0); 648 } 649