xref: /freebsd/sys/kern/kern_thr.c (revision 56a4d1a4cc151cf8356704aafd2d2ec0fd009701)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2003, Jeffrey Roberson <jeff@freebsd.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include "opt_ktrace.h"
30 #include "opt_posix.h"
31 #include "opt_hwpmc_hooks.h"
32 
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35 #ifdef KTRACE
36 #include <sys/ktrace.h>
37 #endif
38 #include <sys/limits.h>
39 #include <sys/lock.h>
40 #include <sys/mutex.h>
41 #include <sys/priv.h>
42 #include <sys/proc.h>
43 #include <sys/posix4.h>
44 #include <sys/ptrace.h>
45 #include <sys/racct.h>
46 #include <sys/resourcevar.h>
47 #include <sys/rtprio.h>
48 #include <sys/rwlock.h>
49 #include <sys/sched.h>
50 #include <sys/sysctl.h>
51 #include <sys/smp.h>
52 #include <sys/syscallsubr.h>
53 #include <sys/sysent.h>
54 #include <sys/sysproto.h>
55 #include <sys/signalvar.h>
56 #include <sys/sysctl.h>
57 #include <sys/thr.h>
58 #include <sys/ucontext.h>
59 #include <sys/umtxvar.h>
60 #ifdef	HWPMC_HOOKS
61 #include <sys/pmckern.h>
62 #endif
63 
64 #include <machine/frame.h>
65 
66 #include <security/audit/audit.h>
67 
68 static SYSCTL_NODE(_kern, OID_AUTO, threads, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
69     "thread allocation");
70 
71 int max_threads_per_proc = 1500;
72 SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_per_proc, CTLFLAG_RW,
73     &max_threads_per_proc, 0, "Limit on threads per proc");
74 
75 static int max_threads_hits;
76 SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_hits, CTLFLAG_RD,
77     &max_threads_hits, 0, "kern.threads.max_threads_per_proc hit count");
78 
79 #ifdef COMPAT_FREEBSD32
80 
81 static inline int
suword_lwpid(void * addr,lwpid_t lwpid)82 suword_lwpid(void *addr, lwpid_t lwpid)
83 {
84 	int error;
85 
86 	if (SV_CURPROC_FLAG(SV_LP64))
87 		error = suword(addr, lwpid);
88 	else
89 		error = suword32(addr, lwpid);
90 	return (error);
91 }
92 
93 #else
94 #define suword_lwpid	suword
95 #endif
96 
97 /*
98  * System call interface.
99  */
100 
101 struct thr_create_initthr_args {
102 	ucontext_t ctx;
103 	long *tid;
104 };
105 
106 static int
thr_create_initthr(struct thread * td,void * thunk)107 thr_create_initthr(struct thread *td, void *thunk)
108 {
109 	struct thr_create_initthr_args *args;
110 
111 	/* Copy out the child tid. */
112 	args = thunk;
113 	if (args->tid != NULL && suword_lwpid(args->tid, td->td_tid))
114 		return (EFAULT);
115 
116 	return (set_mcontext(td, &args->ctx.uc_mcontext));
117 }
118 
119 int
sys_thr_create(struct thread * td,struct thr_create_args * uap)120 sys_thr_create(struct thread *td, struct thr_create_args *uap)
121     /* ucontext_t *ctx, long *id, int flags */
122 {
123 	struct thr_create_initthr_args args;
124 	int error;
125 
126 	if ((error = copyin(uap->ctx, &args.ctx, sizeof(args.ctx))))
127 		return (error);
128 	args.tid = uap->id;
129 	return (thread_create(td, NULL, thr_create_initthr, &args));
130 }
131 
132 int
sys_thr_new(struct thread * td,struct thr_new_args * uap)133 sys_thr_new(struct thread *td, struct thr_new_args *uap)
134     /* struct thr_param * */
135 {
136 	struct thr_param param;
137 	int error;
138 
139 	if (uap->param_size < 0 || uap->param_size > sizeof(param))
140 		return (EINVAL);
141 	bzero(&param, sizeof(param));
142 	if ((error = copyin(uap->param, &param, uap->param_size)))
143 		return (error);
144 	return (kern_thr_new(td, &param));
145 }
146 
147 static int
thr_new_initthr(struct thread * td,void * thunk)148 thr_new_initthr(struct thread *td, void *thunk)
149 {
150 	stack_t stack;
151 	struct thr_param *param;
152 	int error;
153 
154 	/*
155 	 * Here we copy out tid to two places, one for child and one
156 	 * for parent, because pthread can create a detached thread,
157 	 * if parent wants to safely access child tid, it has to provide
158 	 * its storage, because child thread may exit quickly and
159 	 * memory is freed before parent thread can access it.
160 	 */
161 	param = thunk;
162 	if ((param->child_tid != NULL &&
163 	    suword_lwpid(param->child_tid, td->td_tid)) ||
164 	    (param->parent_tid != NULL &&
165 	    suword_lwpid(param->parent_tid, td->td_tid)))
166 		return (EFAULT);
167 
168 	/* Set up our machine context. */
169 	stack.ss_sp = param->stack_base;
170 	stack.ss_size = param->stack_size;
171 	/* Set upcall address to user thread entry function. */
172 	error = cpu_set_upcall(td, param->start_func, param->arg, &stack);
173 	if (error != 0)
174 		return (error);
175 	/* Setup user TLS address and TLS pointer register. */
176 	return (cpu_set_user_tls(td, param->tls_base, param->flags));
177 }
178 
179 int
kern_thr_new(struct thread * td,struct thr_param * param)180 kern_thr_new(struct thread *td, struct thr_param *param)
181 {
182 	struct rtprio rtp, *rtpp;
183 	int error;
184 
185 	if ((param->flags & ~(THR_SUSPENDED | THR_SYSTEM_SCOPE |
186 	    THR_C_RUNTIME)) != 0)
187 		return (EINVAL);
188 	rtpp = NULL;
189 	if (param->rtp != 0) {
190 		error = copyin(param->rtp, &rtp, sizeof(struct rtprio));
191 		if (error)
192 			return (error);
193 		rtpp = &rtp;
194 	}
195 #ifdef KTRACE
196 	if (KTRPOINT(td, KTR_STRUCT))
197 		ktrthrparam(param);
198 #endif
199 	return (thread_create(td, rtpp, thr_new_initthr, param));
200 }
201 
202 int
thread_create(struct thread * td,struct rtprio * rtp,int (* initialize_thread)(struct thread *,void *),void * thunk)203 thread_create(struct thread *td, struct rtprio *rtp,
204     int (*initialize_thread)(struct thread *, void *), void *thunk)
205 {
206 	struct thread *newtd;
207 	struct proc *p;
208 	int error;
209 
210 	p = td->td_proc;
211 
212 	if (rtp != NULL) {
213 		switch(rtp->type) {
214 		case RTP_PRIO_REALTIME:
215 		case RTP_PRIO_FIFO:
216 			/* Only root can set scheduler policy */
217 			if (priv_check(td, PRIV_SCHED_SETPOLICY) != 0)
218 				return (EPERM);
219 			if (rtp->prio > RTP_PRIO_MAX)
220 				return (EINVAL);
221 			break;
222 		case RTP_PRIO_NORMAL:
223 			rtp->prio = 0;
224 			break;
225 		default:
226 			return (EINVAL);
227 		}
228 	}
229 
230 #ifdef RACCT
231 	if (racct_enable) {
232 		PROC_LOCK(p);
233 		error = racct_add(p, RACCT_NTHR, 1);
234 		PROC_UNLOCK(p);
235 		if (error != 0)
236 			return (EPROCLIM);
237 	}
238 #endif
239 
240 	/* Initialize our td */
241 	error = kern_thr_alloc(p, 0, &newtd);
242 	if (error)
243 		goto fail;
244 
245 	bzero(&newtd->td_startzero,
246 	    __rangeof(struct thread, td_startzero, td_endzero));
247 	bcopy(&td->td_startcopy, &newtd->td_startcopy,
248 	    __rangeof(struct thread, td_startcopy, td_endcopy));
249 	newtd->td_proc = td->td_proc;
250 	newtd->td_rb_list = newtd->td_rbp_list = newtd->td_rb_inact = 0;
251 	thread_cow_get(newtd, td);
252 
253 	cpu_copy_thread(newtd, td);
254 
255 	error = initialize_thread(newtd, thunk);
256 	if (error != 0) {
257 		thread_cow_free(newtd);
258 		thread_free(newtd);
259 		goto fail;
260 	}
261 
262 	PROC_LOCK(p);
263 	p->p_flag |= P_HADTHREADS;
264 	thread_link(newtd, p);
265 	bcopy(p->p_comm, newtd->td_name, sizeof(newtd->td_name));
266 	thread_lock(td);
267 	/* let the scheduler know about these things. */
268 	sched_fork_thread(td, newtd);
269 	thread_unlock(td);
270 	if (P_SHOULDSTOP(p))
271 		ast_sched(newtd, TDA_SUSPEND);
272 	if (p->p_ptevents & PTRACE_LWP)
273 		newtd->td_dbgflags |= TDB_BORN;
274 
275 	PROC_UNLOCK(p);
276 #ifdef	HWPMC_HOOKS
277 	if (PMC_PROC_IS_USING_PMCS(p))
278 		PMC_CALL_HOOK(newtd, PMC_FN_THR_CREATE, NULL);
279 	else if (PMC_SYSTEM_SAMPLING_ACTIVE())
280 		PMC_CALL_HOOK_UNLOCKED(newtd, PMC_FN_THR_CREATE_LOG, NULL);
281 #endif
282 
283 	tidhash_add(newtd);
284 
285 	/* ignore timesharing class */
286 	if (rtp != NULL && !(td->td_pri_class == PRI_TIMESHARE &&
287 	    rtp->type == RTP_PRIO_NORMAL))
288 		rtp_to_pri(rtp, newtd);
289 
290 	thread_lock(newtd);
291 	TD_SET_CAN_RUN(newtd);
292 	sched_add(newtd, SRQ_BORING);
293 
294 	return (0);
295 
296 fail:
297 #ifdef RACCT
298 	if (racct_enable) {
299 		PROC_LOCK(p);
300 		racct_sub(p, RACCT_NTHR, 1);
301 		PROC_UNLOCK(p);
302 	}
303 #endif
304 	return (error);
305 }
306 
307 int
sys_thr_self(struct thread * td,struct thr_self_args * uap)308 sys_thr_self(struct thread *td, struct thr_self_args *uap)
309     /* long *id */
310 {
311 	int error;
312 
313 	error = suword_lwpid(uap->id, (unsigned)td->td_tid);
314 	if (error == -1)
315 		return (EFAULT);
316 	return (0);
317 }
318 
319 int
sys_thr_exit(struct thread * td,struct thr_exit_args * uap)320 sys_thr_exit(struct thread *td, struct thr_exit_args *uap)
321     /* long *state */
322 {
323 
324 	umtx_thread_exit(td);
325 
326 	/* Signal userland that it can free the stack. */
327 	if ((void *)uap->state != NULL) {
328 		(void)suword_lwpid(uap->state, 1);
329 		(void)kern_umtx_wake(td, uap->state, INT_MAX, 0);
330 	}
331 
332 	return (kern_thr_exit(td));
333 }
334 
335 int
kern_thr_exit(struct thread * td)336 kern_thr_exit(struct thread *td)
337 {
338 	struct proc *p;
339 
340 	p = td->td_proc;
341 
342 	/*
343 	 * If all of the threads in a process call this routine to
344 	 * exit (e.g. all threads call pthread_exit()), exactly one
345 	 * thread should return to the caller to terminate the process
346 	 * instead of the thread.
347 	 *
348 	 * Checking p_numthreads alone is not sufficient since threads
349 	 * might be committed to terminating while the PROC_LOCK is
350 	 * dropped in either ptracestop() or while removing this thread
351 	 * from the tidhash.  Instead, the p_pendingexits field holds
352 	 * the count of threads in either of those states and a thread
353 	 * is considered the "last" thread if all of the other threads
354 	 * in a process are already terminating.
355 	 */
356 	PROC_LOCK(p);
357 	if (p->p_numthreads == p->p_pendingexits + 1) {
358 		/*
359 		 * Ignore attempts to shut down last thread in the
360 		 * proc.  This will actually call _exit(2) in the
361 		 * usermode trampoline when it returns.
362 		 */
363 		PROC_UNLOCK(p);
364 		return (0);
365 	}
366 
367 	if (p->p_sysent->sv_ontdexit != NULL)
368 		p->p_sysent->sv_ontdexit(td);
369 
370 	td->td_dbgflags |= TDB_EXIT;
371 	if (p->p_ptevents & PTRACE_LWP) {
372 		p->p_pendingexits++;
373 		ptracestop(td, SIGTRAP, NULL);
374 		p->p_pendingexits--;
375 	}
376 	tidhash_remove(td);
377 
378 	/*
379 	 * The check above should prevent all other threads from this
380 	 * process from exiting while the PROC_LOCK is dropped, so
381 	 * there must be at least one other thread other than the
382 	 * current thread.
383 	 */
384 	KASSERT(p->p_numthreads > 1, ("too few threads"));
385 	racct_sub(p, RACCT_NTHR, 1);
386 	tdsigcleanup(td);
387 
388 #ifdef AUDIT
389 	AUDIT_SYSCALL_EXIT(0, td);
390 #endif
391 
392 	PROC_SLOCK(p);
393 	thread_stopped(p);
394 	thread_exit();
395 	/* NOTREACHED */
396 }
397 
398 int
sys_thr_kill(struct thread * td,struct thr_kill_args * uap)399 sys_thr_kill(struct thread *td, struct thr_kill_args *uap)
400     /* long id, int sig */
401 {
402 	ksiginfo_t ksi;
403 	struct thread *ttd;
404 	struct proc *p;
405 	int error;
406 
407 	p = td->td_proc;
408 	ksiginfo_init(&ksi);
409 	ksi.ksi_signo = uap->sig;
410 	ksi.ksi_code = SI_LWP;
411 	ksi.ksi_pid = p->p_pid;
412 	ksi.ksi_uid = td->td_ucred->cr_ruid;
413 	if (uap->id == -1) {
414 		if (uap->sig != 0 && !_SIG_VALID(uap->sig)) {
415 			error = EINVAL;
416 		} else {
417 			error = ESRCH;
418 			PROC_LOCK(p);
419 			FOREACH_THREAD_IN_PROC(p, ttd) {
420 				if (ttd != td) {
421 					error = 0;
422 					if (uap->sig == 0)
423 						break;
424 					tdksignal(ttd, uap->sig, &ksi);
425 				}
426 			}
427 			PROC_UNLOCK(p);
428 		}
429 	} else {
430 		error = 0;
431 		ttd = tdfind((lwpid_t)uap->id, p->p_pid);
432 		if (ttd == NULL)
433 			return (ESRCH);
434 		if (uap->sig == 0)
435 			;
436 		else if (!_SIG_VALID(uap->sig))
437 			error = EINVAL;
438 		else
439 			tdksignal(ttd, uap->sig, &ksi);
440 		PROC_UNLOCK(ttd->td_proc);
441 	}
442 	return (error);
443 }
444 
445 int
sys_thr_kill2(struct thread * td,struct thr_kill2_args * uap)446 sys_thr_kill2(struct thread *td, struct thr_kill2_args *uap)
447     /* pid_t pid, long id, int sig */
448 {
449 	ksiginfo_t ksi;
450 	struct thread *ttd;
451 	struct proc *p;
452 	int error;
453 
454 	AUDIT_ARG_SIGNUM(uap->sig);
455 
456 	ksiginfo_init(&ksi);
457 	ksi.ksi_signo = uap->sig;
458 	ksi.ksi_code = SI_LWP;
459 	ksi.ksi_pid = td->td_proc->p_pid;
460 	ksi.ksi_uid = td->td_ucred->cr_ruid;
461 	if (uap->id == -1) {
462 		if ((p = pfind(uap->pid)) == NULL)
463 			return (ESRCH);
464 		AUDIT_ARG_PROCESS(p);
465 		error = p_cansignal(td, p, uap->sig);
466 		if (error) {
467 			PROC_UNLOCK(p);
468 			return (error);
469 		}
470 		if (uap->sig != 0 && !_SIG_VALID(uap->sig)) {
471 			error = EINVAL;
472 		} else {
473 			error = ESRCH;
474 			FOREACH_THREAD_IN_PROC(p, ttd) {
475 				if (ttd != td) {
476 					error = 0;
477 					if (uap->sig == 0)
478 						break;
479 					tdksignal(ttd, uap->sig, &ksi);
480 				}
481 			}
482 		}
483 		PROC_UNLOCK(p);
484 	} else {
485 		ttd = tdfind((lwpid_t)uap->id, uap->pid);
486 		if (ttd == NULL)
487 			return (ESRCH);
488 		p = ttd->td_proc;
489 		AUDIT_ARG_PROCESS(p);
490 		error = p_cansignal(td, p, uap->sig);
491 		if (uap->sig == 0)
492 			;
493 		else if (!_SIG_VALID(uap->sig))
494 			error = EINVAL;
495 		else
496 			tdksignal(ttd, uap->sig, &ksi);
497 		PROC_UNLOCK(p);
498 	}
499 	return (error);
500 }
501 
502 int
sys_thr_suspend(struct thread * td,struct thr_suspend_args * uap)503 sys_thr_suspend(struct thread *td, struct thr_suspend_args *uap)
504 	/* const struct timespec *timeout */
505 {
506 	struct timespec ts, *tsp;
507 	int error;
508 
509 	tsp = NULL;
510 	if (uap->timeout != NULL) {
511 		error = umtx_copyin_timeout(uap->timeout, &ts);
512 		if (error != 0)
513 			return (error);
514 		tsp = &ts;
515 	}
516 
517 	return (kern_thr_suspend(td, tsp));
518 }
519 
520 int
kern_thr_suspend(struct thread * td,struct timespec * tsp)521 kern_thr_suspend(struct thread *td, struct timespec *tsp)
522 {
523 	struct proc *p = td->td_proc;
524 	struct timeval tv;
525 	int error = 0;
526 	int timo = 0;
527 
528 	if (td->td_pflags & TDP_WAKEUP) {
529 		td->td_pflags &= ~TDP_WAKEUP;
530 		return (0);
531 	}
532 
533 	if (tsp != NULL) {
534 		if (tsp->tv_sec == 0 && tsp->tv_nsec == 0)
535 			error = EWOULDBLOCK;
536 		else {
537 			TIMESPEC_TO_TIMEVAL(&tv, tsp);
538 			timo = tvtohz(&tv);
539 		}
540 	}
541 
542 	PROC_LOCK(p);
543 	if (error == 0 && (td->td_flags & TDF_THRWAKEUP) == 0)
544 		error = msleep((void *)td, &p->p_mtx,
545 			 PCATCH, "lthr", timo);
546 
547 	if (td->td_flags & TDF_THRWAKEUP) {
548 		thread_lock(td);
549 		td->td_flags &= ~TDF_THRWAKEUP;
550 		thread_unlock(td);
551 		PROC_UNLOCK(p);
552 		return (0);
553 	}
554 	PROC_UNLOCK(p);
555 	if (error == EWOULDBLOCK)
556 		error = ETIMEDOUT;
557 	else if (error == ERESTART) {
558 		if (timo != 0)
559 			error = EINTR;
560 	}
561 	return (error);
562 }
563 
564 int
sys_thr_wake(struct thread * td,struct thr_wake_args * uap)565 sys_thr_wake(struct thread *td, struct thr_wake_args *uap)
566 	/* long id */
567 {
568 	struct proc *p;
569 	struct thread *ttd;
570 
571 	if (uap->id == td->td_tid) {
572 		td->td_pflags |= TDP_WAKEUP;
573 		return (0);
574 	}
575 
576 	p = td->td_proc;
577 	ttd = tdfind((lwpid_t)uap->id, p->p_pid);
578 	if (ttd == NULL)
579 		return (ESRCH);
580 	thread_lock(ttd);
581 	ttd->td_flags |= TDF_THRWAKEUP;
582 	thread_unlock(ttd);
583 	wakeup((void *)ttd);
584 	PROC_UNLOCK(p);
585 	return (0);
586 }
587 
588 int
sys_thr_set_name(struct thread * td,struct thr_set_name_args * uap)589 sys_thr_set_name(struct thread *td, struct thr_set_name_args *uap)
590 {
591 	struct proc *p;
592 	char name[MAXCOMLEN + 1];
593 	struct thread *ttd;
594 	int error;
595 
596 	error = 0;
597 	name[0] = '\0';
598 	if (uap->name != NULL) {
599 		error = copyinstr(uap->name, name, sizeof(name), NULL);
600 		if (error == ENAMETOOLONG) {
601 			error = copyin(uap->name, name, sizeof(name) - 1);
602 			name[sizeof(name) - 1] = '\0';
603 		}
604 		if (error)
605 			return (error);
606 	}
607 	p = td->td_proc;
608 	ttd = tdfind((lwpid_t)uap->id, p->p_pid);
609 	if (ttd == NULL)
610 		return (ESRCH);
611 	strcpy(ttd->td_name, name);
612 #ifdef HWPMC_HOOKS
613 	if (PMC_PROC_IS_USING_PMCS(p) || PMC_SYSTEM_SAMPLING_ACTIVE())
614 		PMC_CALL_HOOK_UNLOCKED(ttd, PMC_FN_THR_CREATE_LOG, NULL);
615 #endif
616 #ifdef KTR
617 	sched_clear_tdname(ttd);
618 #endif
619 	PROC_UNLOCK(p);
620 	return (error);
621 }
622 
623 int
kern_thr_alloc(struct proc * p,int pages,struct thread ** ntd)624 kern_thr_alloc(struct proc *p, int pages, struct thread **ntd)
625 {
626 
627 	/* Have race condition but it is cheap. */
628 	if (p->p_numthreads >= max_threads_per_proc) {
629 		++max_threads_hits;
630 		return (EPROCLIM);
631 	}
632 
633 	*ntd = thread_alloc(pages);
634 	if (*ntd == NULL)
635 		return (ENOMEM);
636 
637 	return (0);
638 }
639