1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2003, Jeffrey Roberson <jeff@freebsd.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include "opt_compat.h" 33 #include "opt_posix.h" 34 #include <sys/param.h> 35 #include <sys/kernel.h> 36 #include <sys/lock.h> 37 #include <sys/mutex.h> 38 #include <sys/priv.h> 39 #include <sys/proc.h> 40 #include <sys/posix4.h> 41 #include <sys/ptrace.h> 42 #include <sys/racct.h> 43 #include <sys/resourcevar.h> 44 #include <sys/rwlock.h> 45 #include <sys/sched.h> 46 #include <sys/sysctl.h> 47 #include <sys/smp.h> 48 #include <sys/syscallsubr.h> 49 #include <sys/sysent.h> 50 #include <sys/systm.h> 51 #include <sys/sysproto.h> 52 #include <sys/signalvar.h> 53 #include <sys/sysctl.h> 54 #include <sys/ucontext.h> 55 #include <sys/thr.h> 56 #include <sys/rtprio.h> 57 #include <sys/umtx.h> 58 #include <sys/limits.h> 59 60 #include <vm/vm_domain.h> 61 62 #include <machine/frame.h> 63 64 #include <security/audit/audit.h> 65 66 static SYSCTL_NODE(_kern, OID_AUTO, threads, CTLFLAG_RW, 0, 67 "thread allocation"); 68 69 static int max_threads_per_proc = 1500; 70 SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_per_proc, CTLFLAG_RW, 71 &max_threads_per_proc, 0, "Limit on threads per proc"); 72 73 static int max_threads_hits; 74 SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_hits, CTLFLAG_RD, 75 &max_threads_hits, 0, "kern.threads.max_threads_per_proc hit count"); 76 77 #ifdef COMPAT_FREEBSD32 78 79 static inline int 80 suword_lwpid(void *addr, lwpid_t lwpid) 81 { 82 int error; 83 84 if (SV_CURPROC_FLAG(SV_LP64)) 85 error = suword(addr, lwpid); 86 else 87 error = suword32(addr, lwpid); 88 return (error); 89 } 90 91 #else 92 #define suword_lwpid suword 93 #endif 94 95 /* 96 * System call interface. 97 */ 98 99 struct thr_create_initthr_args { 100 ucontext_t ctx; 101 long *tid; 102 }; 103 104 static int 105 thr_create_initthr(struct thread *td, void *thunk) 106 { 107 struct thr_create_initthr_args *args; 108 109 /* Copy out the child tid. */ 110 args = thunk; 111 if (args->tid != NULL && suword_lwpid(args->tid, td->td_tid)) 112 return (EFAULT); 113 114 return (set_mcontext(td, &args->ctx.uc_mcontext)); 115 } 116 117 int 118 sys_thr_create(struct thread *td, struct thr_create_args *uap) 119 /* ucontext_t *ctx, long *id, int flags */ 120 { 121 struct thr_create_initthr_args args; 122 int error; 123 124 if ((error = copyin(uap->ctx, &args.ctx, sizeof(args.ctx)))) 125 return (error); 126 args.tid = uap->id; 127 return (thread_create(td, NULL, thr_create_initthr, &args)); 128 } 129 130 int 131 sys_thr_new(struct thread *td, struct thr_new_args *uap) 132 /* struct thr_param * */ 133 { 134 struct thr_param param; 135 int error; 136 137 if (uap->param_size < 0 || uap->param_size > sizeof(param)) 138 return (EINVAL); 139 bzero(¶m, sizeof(param)); 140 if ((error = copyin(uap->param, ¶m, uap->param_size))) 141 return (error); 142 return (kern_thr_new(td, ¶m)); 143 } 144 145 static int 146 thr_new_initthr(struct thread *td, void *thunk) 147 { 148 stack_t stack; 149 struct thr_param *param; 150 151 /* 152 * Here we copy out tid to two places, one for child and one 153 * for parent, because pthread can create a detached thread, 154 * if parent wants to safely access child tid, it has to provide 155 * its storage, because child thread may exit quickly and 156 * memory is freed before parent thread can access it. 157 */ 158 param = thunk; 159 if ((param->child_tid != NULL && 160 suword_lwpid(param->child_tid, td->td_tid)) || 161 (param->parent_tid != NULL && 162 suword_lwpid(param->parent_tid, td->td_tid))) 163 return (EFAULT); 164 165 /* Set up our machine context. */ 166 stack.ss_sp = param->stack_base; 167 stack.ss_size = param->stack_size; 168 /* Set upcall address to user thread entry function. */ 169 cpu_set_upcall(td, param->start_func, param->arg, &stack); 170 /* Setup user TLS address and TLS pointer register. */ 171 return (cpu_set_user_tls(td, param->tls_base)); 172 } 173 174 int 175 kern_thr_new(struct thread *td, struct thr_param *param) 176 { 177 struct rtprio rtp, *rtpp; 178 int error; 179 180 rtpp = NULL; 181 if (param->rtp != 0) { 182 error = copyin(param->rtp, &rtp, sizeof(struct rtprio)); 183 if (error) 184 return (error); 185 rtpp = &rtp; 186 } 187 return (thread_create(td, rtpp, thr_new_initthr, param)); 188 } 189 190 int 191 thread_create(struct thread *td, struct rtprio *rtp, 192 int (*initialize_thread)(struct thread *, void *), void *thunk) 193 { 194 struct thread *newtd; 195 struct proc *p; 196 int error; 197 198 p = td->td_proc; 199 200 if (rtp != NULL) { 201 switch(rtp->type) { 202 case RTP_PRIO_REALTIME: 203 case RTP_PRIO_FIFO: 204 /* Only root can set scheduler policy */ 205 if (priv_check(td, PRIV_SCHED_SETPOLICY) != 0) 206 return (EPERM); 207 if (rtp->prio > RTP_PRIO_MAX) 208 return (EINVAL); 209 break; 210 case RTP_PRIO_NORMAL: 211 rtp->prio = 0; 212 break; 213 default: 214 return (EINVAL); 215 } 216 } 217 218 #ifdef RACCT 219 if (racct_enable) { 220 PROC_LOCK(p); 221 error = racct_add(p, RACCT_NTHR, 1); 222 PROC_UNLOCK(p); 223 if (error != 0) 224 return (EPROCLIM); 225 } 226 #endif 227 228 /* Initialize our td */ 229 error = kern_thr_alloc(p, 0, &newtd); 230 if (error) 231 goto fail; 232 233 cpu_copy_thread(newtd, td); 234 235 bzero(&newtd->td_startzero, 236 __rangeof(struct thread, td_startzero, td_endzero)); 237 bcopy(&td->td_startcopy, &newtd->td_startcopy, 238 __rangeof(struct thread, td_startcopy, td_endcopy)); 239 newtd->td_proc = td->td_proc; 240 newtd->td_rb_list = newtd->td_rbp_list = newtd->td_rb_inact = 0; 241 thread_cow_get(newtd, td); 242 243 error = initialize_thread(newtd, thunk); 244 if (error != 0) { 245 thread_cow_free(newtd); 246 thread_free(newtd); 247 goto fail; 248 } 249 250 PROC_LOCK(p); 251 p->p_flag |= P_HADTHREADS; 252 thread_link(newtd, p); 253 bcopy(p->p_comm, newtd->td_name, sizeof(newtd->td_name)); 254 thread_lock(td); 255 /* let the scheduler know about these things. */ 256 sched_fork_thread(td, newtd); 257 thread_unlock(td); 258 if (P_SHOULDSTOP(p)) 259 newtd->td_flags |= TDF_ASTPENDING | TDF_NEEDSUSPCHK; 260 if (p->p_ptevents & PTRACE_LWP) 261 newtd->td_dbgflags |= TDB_BORN; 262 263 /* 264 * Copy the existing thread VM policy into the new thread. 265 */ 266 vm_domain_policy_localcopy(&newtd->td_vm_dom_policy, 267 &td->td_vm_dom_policy); 268 269 PROC_UNLOCK(p); 270 271 tidhash_add(newtd); 272 273 thread_lock(newtd); 274 if (rtp != NULL) { 275 if (!(td->td_pri_class == PRI_TIMESHARE && 276 rtp->type == RTP_PRIO_NORMAL)) { 277 rtp_to_pri(rtp, newtd); 278 sched_prio(newtd, newtd->td_user_pri); 279 } /* ignore timesharing class */ 280 } 281 TD_SET_CAN_RUN(newtd); 282 sched_add(newtd, SRQ_BORING); 283 thread_unlock(newtd); 284 285 return (0); 286 287 fail: 288 #ifdef RACCT 289 if (racct_enable) { 290 PROC_LOCK(p); 291 racct_sub(p, RACCT_NTHR, 1); 292 PROC_UNLOCK(p); 293 } 294 #endif 295 return (error); 296 } 297 298 int 299 sys_thr_self(struct thread *td, struct thr_self_args *uap) 300 /* long *id */ 301 { 302 int error; 303 304 error = suword_lwpid(uap->id, (unsigned)td->td_tid); 305 if (error == -1) 306 return (EFAULT); 307 return (0); 308 } 309 310 int 311 sys_thr_exit(struct thread *td, struct thr_exit_args *uap) 312 /* long *state */ 313 { 314 315 umtx_thread_exit(td); 316 317 /* Signal userland that it can free the stack. */ 318 if ((void *)uap->state != NULL) { 319 suword_lwpid(uap->state, 1); 320 kern_umtx_wake(td, uap->state, INT_MAX, 0); 321 } 322 323 return (kern_thr_exit(td)); 324 } 325 326 int 327 kern_thr_exit(struct thread *td) 328 { 329 struct proc *p; 330 331 p = td->td_proc; 332 333 /* 334 * If all of the threads in a process call this routine to 335 * exit (e.g. all threads call pthread_exit()), exactly one 336 * thread should return to the caller to terminate the process 337 * instead of the thread. 338 * 339 * Checking p_numthreads alone is not sufficient since threads 340 * might be committed to terminating while the PROC_LOCK is 341 * dropped in either ptracestop() or while removing this thread 342 * from the tidhash. Instead, the p_pendingexits field holds 343 * the count of threads in either of those states and a thread 344 * is considered the "last" thread if all of the other threads 345 * in a process are already terminating. 346 */ 347 PROC_LOCK(p); 348 if (p->p_numthreads == p->p_pendingexits + 1) { 349 /* 350 * Ignore attempts to shut down last thread in the 351 * proc. This will actually call _exit(2) in the 352 * usermode trampoline when it returns. 353 */ 354 PROC_UNLOCK(p); 355 return (0); 356 } 357 358 p->p_pendingexits++; 359 td->td_dbgflags |= TDB_EXIT; 360 if (p->p_ptevents & PTRACE_LWP) 361 ptracestop(td, SIGTRAP, NULL); 362 PROC_UNLOCK(p); 363 tidhash_remove(td); 364 PROC_LOCK(p); 365 p->p_pendingexits--; 366 367 /* 368 * The check above should prevent all other threads from this 369 * process from exiting while the PROC_LOCK is dropped, so 370 * there must be at least one other thread other than the 371 * current thread. 372 */ 373 KASSERT(p->p_numthreads > 1, ("too few threads")); 374 racct_sub(p, RACCT_NTHR, 1); 375 tdsigcleanup(td); 376 PROC_SLOCK(p); 377 thread_stopped(p); 378 thread_exit(); 379 /* NOTREACHED */ 380 } 381 382 int 383 sys_thr_kill(struct thread *td, struct thr_kill_args *uap) 384 /* long id, int sig */ 385 { 386 ksiginfo_t ksi; 387 struct thread *ttd; 388 struct proc *p; 389 int error; 390 391 p = td->td_proc; 392 ksiginfo_init(&ksi); 393 ksi.ksi_signo = uap->sig; 394 ksi.ksi_code = SI_LWP; 395 ksi.ksi_pid = p->p_pid; 396 ksi.ksi_uid = td->td_ucred->cr_ruid; 397 if (uap->id == -1) { 398 if (uap->sig != 0 && !_SIG_VALID(uap->sig)) { 399 error = EINVAL; 400 } else { 401 error = ESRCH; 402 PROC_LOCK(p); 403 FOREACH_THREAD_IN_PROC(p, ttd) { 404 if (ttd != td) { 405 error = 0; 406 if (uap->sig == 0) 407 break; 408 tdksignal(ttd, uap->sig, &ksi); 409 } 410 } 411 PROC_UNLOCK(p); 412 } 413 } else { 414 error = 0; 415 ttd = tdfind((lwpid_t)uap->id, p->p_pid); 416 if (ttd == NULL) 417 return (ESRCH); 418 if (uap->sig == 0) 419 ; 420 else if (!_SIG_VALID(uap->sig)) 421 error = EINVAL; 422 else 423 tdksignal(ttd, uap->sig, &ksi); 424 PROC_UNLOCK(ttd->td_proc); 425 } 426 return (error); 427 } 428 429 int 430 sys_thr_kill2(struct thread *td, struct thr_kill2_args *uap) 431 /* pid_t pid, long id, int sig */ 432 { 433 ksiginfo_t ksi; 434 struct thread *ttd; 435 struct proc *p; 436 int error; 437 438 AUDIT_ARG_SIGNUM(uap->sig); 439 440 ksiginfo_init(&ksi); 441 ksi.ksi_signo = uap->sig; 442 ksi.ksi_code = SI_LWP; 443 ksi.ksi_pid = td->td_proc->p_pid; 444 ksi.ksi_uid = td->td_ucred->cr_ruid; 445 if (uap->id == -1) { 446 if ((p = pfind(uap->pid)) == NULL) 447 return (ESRCH); 448 AUDIT_ARG_PROCESS(p); 449 error = p_cansignal(td, p, uap->sig); 450 if (error) { 451 PROC_UNLOCK(p); 452 return (error); 453 } 454 if (uap->sig != 0 && !_SIG_VALID(uap->sig)) { 455 error = EINVAL; 456 } else { 457 error = ESRCH; 458 FOREACH_THREAD_IN_PROC(p, ttd) { 459 if (ttd != td) { 460 error = 0; 461 if (uap->sig == 0) 462 break; 463 tdksignal(ttd, uap->sig, &ksi); 464 } 465 } 466 } 467 PROC_UNLOCK(p); 468 } else { 469 ttd = tdfind((lwpid_t)uap->id, uap->pid); 470 if (ttd == NULL) 471 return (ESRCH); 472 p = ttd->td_proc; 473 AUDIT_ARG_PROCESS(p); 474 error = p_cansignal(td, p, uap->sig); 475 if (uap->sig == 0) 476 ; 477 else if (!_SIG_VALID(uap->sig)) 478 error = EINVAL; 479 else 480 tdksignal(ttd, uap->sig, &ksi); 481 PROC_UNLOCK(p); 482 } 483 return (error); 484 } 485 486 int 487 sys_thr_suspend(struct thread *td, struct thr_suspend_args *uap) 488 /* const struct timespec *timeout */ 489 { 490 struct timespec ts, *tsp; 491 int error; 492 493 tsp = NULL; 494 if (uap->timeout != NULL) { 495 error = umtx_copyin_timeout(uap->timeout, &ts); 496 if (error != 0) 497 return (error); 498 tsp = &ts; 499 } 500 501 return (kern_thr_suspend(td, tsp)); 502 } 503 504 int 505 kern_thr_suspend(struct thread *td, struct timespec *tsp) 506 { 507 struct proc *p = td->td_proc; 508 struct timeval tv; 509 int error = 0; 510 int timo = 0; 511 512 if (td->td_pflags & TDP_WAKEUP) { 513 td->td_pflags &= ~TDP_WAKEUP; 514 return (0); 515 } 516 517 if (tsp != NULL) { 518 if (tsp->tv_sec == 0 && tsp->tv_nsec == 0) 519 error = EWOULDBLOCK; 520 else { 521 TIMESPEC_TO_TIMEVAL(&tv, tsp); 522 timo = tvtohz(&tv); 523 } 524 } 525 526 PROC_LOCK(p); 527 if (error == 0 && (td->td_flags & TDF_THRWAKEUP) == 0) 528 error = msleep((void *)td, &p->p_mtx, 529 PCATCH, "lthr", timo); 530 531 if (td->td_flags & TDF_THRWAKEUP) { 532 thread_lock(td); 533 td->td_flags &= ~TDF_THRWAKEUP; 534 thread_unlock(td); 535 PROC_UNLOCK(p); 536 return (0); 537 } 538 PROC_UNLOCK(p); 539 if (error == EWOULDBLOCK) 540 error = ETIMEDOUT; 541 else if (error == ERESTART) { 542 if (timo != 0) 543 error = EINTR; 544 } 545 return (error); 546 } 547 548 int 549 sys_thr_wake(struct thread *td, struct thr_wake_args *uap) 550 /* long id */ 551 { 552 struct proc *p; 553 struct thread *ttd; 554 555 if (uap->id == td->td_tid) { 556 td->td_pflags |= TDP_WAKEUP; 557 return (0); 558 } 559 560 p = td->td_proc; 561 ttd = tdfind((lwpid_t)uap->id, p->p_pid); 562 if (ttd == NULL) 563 return (ESRCH); 564 thread_lock(ttd); 565 ttd->td_flags |= TDF_THRWAKEUP; 566 thread_unlock(ttd); 567 wakeup((void *)ttd); 568 PROC_UNLOCK(p); 569 return (0); 570 } 571 572 int 573 sys_thr_set_name(struct thread *td, struct thr_set_name_args *uap) 574 { 575 struct proc *p; 576 char name[MAXCOMLEN + 1]; 577 struct thread *ttd; 578 int error; 579 580 error = 0; 581 name[0] = '\0'; 582 if (uap->name != NULL) { 583 error = copyinstr(uap->name, name, sizeof(name), NULL); 584 if (error == ENAMETOOLONG) { 585 error = copyin(uap->name, name, sizeof(name) - 1); 586 name[sizeof(name) - 1] = '\0'; 587 } 588 if (error) 589 return (error); 590 } 591 p = td->td_proc; 592 ttd = tdfind((lwpid_t)uap->id, p->p_pid); 593 if (ttd == NULL) 594 return (ESRCH); 595 strcpy(ttd->td_name, name); 596 #ifdef KTR 597 sched_clear_tdname(ttd); 598 #endif 599 PROC_UNLOCK(p); 600 return (error); 601 } 602 603 int 604 kern_thr_alloc(struct proc *p, int pages, struct thread **ntd) 605 { 606 607 /* Have race condition but it is cheap. */ 608 if (p->p_numthreads >= max_threads_per_proc) { 609 ++max_threads_hits; 610 return (EPROCLIM); 611 } 612 613 *ntd = thread_alloc(pages); 614 if (*ntd == NULL) 615 return (ENOMEM); 616 617 return (0); 618 } 619