1 /*- 2 * Copyright (c) 2006 Roman Divacky 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include "opt_compat.h" 33 #include "opt_kdtrace.h" 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/imgact.h> 38 #include <sys/kernel.h> 39 #include <sys/lock.h> 40 #include <sys/malloc.h> 41 #include <sys/mutex.h> 42 #include <sys/sdt.h> 43 #include <sys/sx.h> 44 #include <sys/proc.h> 45 #include <sys/syscallsubr.h> 46 #include <sys/sysent.h> 47 #include <sys/sysproto.h> 48 #include <sys/unistd.h> 49 50 #ifdef COMPAT_LINUX32 51 #include <machine/../linux32/linux.h> 52 #include <machine/../linux32/linux32_proto.h> 53 #else 54 #include <machine/../linux/linux.h> 55 #include <machine/../linux/linux_proto.h> 56 #endif 57 58 #include <compat/linux/linux_dtrace.h> 59 #include <compat/linux/linux_emul.h> 60 #include <compat/linux/linux_futex.h> 61 #include <compat/linux/linux_misc.h> 62 63 /** 64 * Special DTrace provider for the linuxulator. 65 * 66 * In this file we define the provider for the entire linuxulator. All 67 * modules (= files of the linuxulator) use it. 68 * 69 * We define a different name depending on the emulated bitsize, see 70 * ../../<ARCH>/linux{,32}/linux.h, e.g.: 71 * native bitsize = linuxulator 72 * amd64, 32bit emulation = linuxulator32 73 */ 74 LIN_SDT_PROVIDER_DEFINE(LINUX_DTRACE); 75 76 /** 77 * Special DTrace module "locks", it covers some linuxulator internal 78 * locks. 79 */ 80 LIN_SDT_PROBE_DEFINE1(locks, emul_lock, locked, "struct mtx *"); 81 LIN_SDT_PROBE_DEFINE1(locks, emul_lock, unlock, "struct mtx *"); 82 LIN_SDT_PROBE_DEFINE1(locks, emul_shared_rlock, locked, "struct sx *"); 83 LIN_SDT_PROBE_DEFINE1(locks, emul_shared_rlock, unlock, "struct sx *"); 84 LIN_SDT_PROBE_DEFINE1(locks, emul_shared_wlock, locked, "struct sx *"); 85 LIN_SDT_PROBE_DEFINE1(locks, emul_shared_wlock, unlock, "struct sx *"); 86 87 /** 88 * DTrace probes in this module. 89 */ 90 LIN_SDT_PROBE_DEFINE2(emul, em_find, entry, "struct proc *", "int"); 91 LIN_SDT_PROBE_DEFINE0(emul, em_find, return); 92 LIN_SDT_PROBE_DEFINE3(emul, proc_init, entry, "struct thread *", "pid_t", 93 "int"); 94 LIN_SDT_PROBE_DEFINE0(emul, proc_init, create_thread); 95 LIN_SDT_PROBE_DEFINE0(emul, proc_init, fork); 96 LIN_SDT_PROBE_DEFINE0(emul, proc_init, exec); 97 LIN_SDT_PROBE_DEFINE0(emul, proc_init, return); 98 LIN_SDT_PROBE_DEFINE1(emul, proc_exit, entry, "struct proc *"); 99 LIN_SDT_PROBE_DEFINE0(emul, proc_exit, futex_failed); 100 LIN_SDT_PROBE_DEFINE3(emul, proc_exit, reparent, "pid_t", "pid_t", 101 "struct proc *"); 102 LIN_SDT_PROBE_DEFINE1(emul, proc_exit, child_clear_tid_error, "int"); 103 LIN_SDT_PROBE_DEFINE0(emul, proc_exit, return); 104 LIN_SDT_PROBE_DEFINE2(emul, proc_exec, entry, "struct proc *", 105 "struct image_params *"); 106 LIN_SDT_PROBE_DEFINE0(emul, proc_exec, return); 107 LIN_SDT_PROBE_DEFINE0(emul, linux_schedtail, entry); 108 LIN_SDT_PROBE_DEFINE1(emul, linux_schedtail, copyout_error, "int"); 109 LIN_SDT_PROBE_DEFINE0(emul, linux_schedtail, return); 110 LIN_SDT_PROBE_DEFINE1(emul, linux_set_tid_address, entry, "int *"); 111 LIN_SDT_PROBE_DEFINE0(emul, linux_set_tid_address, return); 112 LIN_SDT_PROBE_DEFINE2(emul, linux_kill_threads, entry, "struct thread *", 113 "int"); 114 LIN_SDT_PROBE_DEFINE1(emul, linux_kill_threads, kill, "pid_t"); 115 LIN_SDT_PROBE_DEFINE0(emul, linux_kill_threads, return); 116 117 struct sx emul_shared_lock; 118 struct mtx emul_lock; 119 120 /* this returns locked reference to the emuldata entry (if found) */ 121 struct linux_emuldata * 122 em_find(struct proc *p, int locked) 123 { 124 struct linux_emuldata *em; 125 126 LIN_SDT_PROBE2(emul, em_find, entry, p, locked); 127 128 if (locked == EMUL_DOLOCK) 129 EMUL_LOCK(&emul_lock); 130 131 em = p->p_emuldata; 132 133 if (em == NULL && locked == EMUL_DOLOCK) 134 EMUL_UNLOCK(&emul_lock); 135 136 LIN_SDT_PROBE1(emul, em_find, return, em); 137 return (em); 138 } 139 140 int 141 linux_proc_init(struct thread *td, pid_t child, int flags) 142 { 143 struct linux_emuldata *em, *p_em; 144 struct proc *p; 145 146 LIN_SDT_PROBE3(emul, proc_init, entry, td, child, flags); 147 148 if (child != 0) { 149 /* fork or create a thread */ 150 em = malloc(sizeof *em, M_LINUX, M_WAITOK | M_ZERO); 151 em->pid = child; 152 em->pdeath_signal = 0; 153 em->flags = 0; 154 em->robust_futexes = NULL; 155 if (flags & LINUX_CLONE_THREAD) { 156 /* handled later in the code */ 157 LIN_SDT_PROBE0(emul, proc_init, create_thread); 158 } else { 159 struct linux_emuldata_shared *s; 160 161 LIN_SDT_PROBE0(emul, proc_init, fork); 162 163 s = malloc(sizeof *s, M_LINUX, M_WAITOK | M_ZERO); 164 s->refs = 1; 165 s->group_pid = child; 166 167 LIST_INIT(&s->threads); 168 em->shared = s; 169 } 170 } else { 171 /* exec */ 172 LIN_SDT_PROBE0(emul, proc_init, exec); 173 174 /* lookup the old one */ 175 em = em_find(td->td_proc, EMUL_DOLOCK); 176 KASSERT(em != NULL, ("proc_init: emuldata not found in exec case.\n")); 177 } 178 179 em->child_clear_tid = NULL; 180 em->child_set_tid = NULL; 181 182 /* 183 * allocate the shared struct only in clone()/fork cases in the case 184 * of clone() td = calling proc and child = pid of the newly created 185 * proc 186 */ 187 if (child != 0) { 188 if (flags & LINUX_CLONE_THREAD) { 189 /* lookup the parent */ 190 /* 191 * we dont have to lock the p_em because 192 * its waiting for us in linux_clone so 193 * there is no chance of it changing the 194 * p_em->shared address 195 */ 196 p_em = em_find(td->td_proc, EMUL_DONTLOCK); 197 KASSERT(p_em != NULL, ("proc_init: parent emuldata not found for CLONE_THREAD\n")); 198 em->shared = p_em->shared; 199 EMUL_SHARED_WLOCK(&emul_shared_lock); 200 em->shared->refs++; 201 EMUL_SHARED_WUNLOCK(&emul_shared_lock); 202 } else { 203 /* 204 * handled earlier to avoid malloc(M_WAITOK) with 205 * rwlock held 206 */ 207 } 208 209 EMUL_SHARED_WLOCK(&emul_shared_lock); 210 LIST_INSERT_HEAD(&em->shared->threads, em, threads); 211 EMUL_SHARED_WUNLOCK(&emul_shared_lock); 212 213 p = pfind(child); 214 KASSERT(p != NULL, ("process not found in proc_init\n")); 215 p->p_emuldata = em; 216 PROC_UNLOCK(p); 217 } else 218 EMUL_UNLOCK(&emul_lock); 219 220 LIN_SDT_PROBE0(emul, proc_init, return); 221 return (0); 222 } 223 224 void 225 linux_proc_exit(void *arg __unused, struct proc *p) 226 { 227 struct linux_emuldata *em; 228 int error, shared_flags, shared_xstat; 229 struct thread *td = FIRST_THREAD_IN_PROC(p); 230 int *child_clear_tid; 231 struct proc *q, *nq; 232 233 if (__predict_true(p->p_sysent != &elf_linux_sysvec)) 234 return; 235 236 LIN_SDT_PROBE1(emul, proc_exit, entry, p); 237 238 release_futexes(p); 239 240 /* find the emuldata */ 241 em = em_find(p, EMUL_DOLOCK); 242 243 KASSERT(em != NULL, ("proc_exit: emuldata not found.\n")); 244 245 /* reparent all procs that are not a thread leader to initproc */ 246 if (em->shared->group_pid != p->p_pid) { 247 LIN_SDT_PROBE3(emul, proc_exit, reparent, 248 em->shared->group_pid, p->p_pid, p); 249 250 child_clear_tid = em->child_clear_tid; 251 EMUL_UNLOCK(&emul_lock); 252 sx_xlock(&proctree_lock); 253 wakeup(initproc); 254 PROC_LOCK(p); 255 proc_reparent(p, initproc); 256 p->p_sigparent = SIGCHLD; 257 PROC_UNLOCK(p); 258 sx_xunlock(&proctree_lock); 259 } else { 260 child_clear_tid = em->child_clear_tid; 261 EMUL_UNLOCK(&emul_lock); 262 } 263 264 EMUL_SHARED_WLOCK(&emul_shared_lock); 265 shared_flags = em->shared->flags; 266 shared_xstat = em->shared->xstat; 267 LIST_REMOVE(em, threads); 268 269 em->shared->refs--; 270 if (em->shared->refs == 0) { 271 EMUL_SHARED_WUNLOCK(&emul_shared_lock); 272 free(em->shared, M_LINUX); 273 } else 274 EMUL_SHARED_WUNLOCK(&emul_shared_lock); 275 276 if ((shared_flags & EMUL_SHARED_HASXSTAT) != 0) 277 p->p_xstat = shared_xstat; 278 279 if (child_clear_tid != NULL) { 280 struct linux_sys_futex_args cup; 281 int null = 0; 282 283 error = copyout(&null, child_clear_tid, sizeof(null)); 284 if (error) { 285 LIN_SDT_PROBE1(emul, proc_exit, 286 child_clear_tid_error, error); 287 288 free(em, M_LINUX); 289 290 LIN_SDT_PROBE0(emul, proc_exit, return); 291 return; 292 } 293 294 /* futexes stuff */ 295 cup.uaddr = child_clear_tid; 296 cup.op = LINUX_FUTEX_WAKE; 297 cup.val = 0x7fffffff; /* Awake everyone */ 298 cup.timeout = NULL; 299 cup.uaddr2 = NULL; 300 cup.val3 = 0; 301 error = linux_sys_futex(FIRST_THREAD_IN_PROC(p), &cup); 302 /* 303 * this cannot happen at the moment and if this happens it 304 * probably means there is a user space bug 305 */ 306 if (error) { 307 LIN_SDT_PROBE0(emul, proc_exit, futex_failed); 308 printf(LMSG("futex stuff in proc_exit failed.\n")); 309 } 310 } 311 312 /* clean the stuff up */ 313 free(em, M_LINUX); 314 315 /* this is a little weird but rewritten from exit1() */ 316 sx_xlock(&proctree_lock); 317 q = LIST_FIRST(&p->p_children); 318 for (; q != NULL; q = nq) { 319 nq = LIST_NEXT(q, p_sibling); 320 if (q->p_flag & P_WEXIT) 321 continue; 322 if (__predict_false(q->p_sysent != &elf_linux_sysvec)) 323 continue; 324 em = em_find(q, EMUL_DOLOCK); 325 KASSERT(em != NULL, ("linux_reparent: emuldata not found: %i\n", q->p_pid)); 326 PROC_LOCK(q); 327 if ((q->p_flag & P_WEXIT) == 0 && em->pdeath_signal != 0) { 328 kern_psignal(q, em->pdeath_signal); 329 } 330 PROC_UNLOCK(q); 331 EMUL_UNLOCK(&emul_lock); 332 } 333 sx_xunlock(&proctree_lock); 334 335 LIN_SDT_PROBE0(emul, proc_exit, return); 336 } 337 338 /* 339 * This is used in a case of transition from FreeBSD binary execing to linux binary 340 * in this case we create linux emuldata proc entry with the pid of the currently running 341 * process. 342 */ 343 void 344 linux_proc_exec(void *arg __unused, struct proc *p, struct image_params *imgp) 345 { 346 if (__predict_false(imgp->sysent == &elf_linux_sysvec)) { 347 LIN_SDT_PROBE2(emul, proc_exec, entry, p, imgp); 348 } 349 if (__predict_false(imgp->sysent == &elf_linux_sysvec 350 && p->p_sysent != &elf_linux_sysvec)) 351 linux_proc_init(FIRST_THREAD_IN_PROC(p), p->p_pid, 0); 352 if (__predict_false((p->p_sysent->sv_flags & SV_ABI_MASK) == 353 SV_ABI_LINUX)) 354 /* Kill threads regardless of imgp->sysent value */ 355 linux_kill_threads(FIRST_THREAD_IN_PROC(p), SIGKILL); 356 if (__predict_false(imgp->sysent != &elf_linux_sysvec 357 && p->p_sysent == &elf_linux_sysvec)) { 358 struct linux_emuldata *em; 359 360 /* 361 * XXX:There's a race because here we assign p->p_emuldata NULL 362 * but the process is still counted as linux one for a short 363 * time so some other process might reference it and try to 364 * access its p->p_emuldata and panicing on a NULL reference. 365 */ 366 em = em_find(p, EMUL_DONTLOCK); 367 368 KASSERT(em != NULL, ("proc_exec: emuldata not found.\n")); 369 370 EMUL_SHARED_WLOCK(&emul_shared_lock); 371 LIST_REMOVE(em, threads); 372 373 PROC_LOCK(p); 374 p->p_emuldata = NULL; 375 PROC_UNLOCK(p); 376 377 em->shared->refs--; 378 if (em->shared->refs == 0) { 379 EMUL_SHARED_WUNLOCK(&emul_shared_lock); 380 free(em->shared, M_LINUX); 381 } else 382 EMUL_SHARED_WUNLOCK(&emul_shared_lock); 383 384 free(em, M_LINUX); 385 } 386 387 if (__predict_false(imgp->sysent == &elf_linux_sysvec)) { 388 LIN_SDT_PROBE0(emul, proc_exec, return); 389 } 390 } 391 392 void 393 linux_schedtail(struct thread *td) 394 { 395 struct linux_emuldata *em; 396 struct proc *p; 397 int error = 0; 398 int *child_set_tid; 399 400 p = td->td_proc; 401 402 LIN_SDT_PROBE1(emul, linux_schedtail, entry, p); 403 404 /* find the emuldata */ 405 em = em_find(p, EMUL_DOLOCK); 406 407 KASSERT(em != NULL, ("linux_schedtail: emuldata not found.\n")); 408 child_set_tid = em->child_set_tid; 409 EMUL_UNLOCK(&emul_lock); 410 411 if (child_set_tid != NULL) { 412 error = copyout(&p->p_pid, (int *)child_set_tid, 413 sizeof(p->p_pid)); 414 415 if (error != 0) { 416 LIN_SDT_PROBE1(emul, linux_schedtail, copyout_error, 417 error); 418 } 419 } 420 421 LIN_SDT_PROBE0(emul, linux_schedtail, return); 422 423 return; 424 } 425 426 int 427 linux_set_tid_address(struct thread *td, struct linux_set_tid_address_args *args) 428 { 429 struct linux_emuldata *em; 430 431 LIN_SDT_PROBE1(emul, linux_set_tid_address, entry, args->tidptr); 432 433 /* find the emuldata */ 434 em = em_find(td->td_proc, EMUL_DOLOCK); 435 436 KASSERT(em != NULL, ("set_tid_address: emuldata not found.\n")); 437 438 em->child_clear_tid = args->tidptr; 439 td->td_retval[0] = td->td_proc->p_pid; 440 441 EMUL_UNLOCK(&emul_lock); 442 443 LIN_SDT_PROBE0(emul, linux_set_tid_address, return); 444 return 0; 445 } 446 447 void 448 linux_kill_threads(struct thread *td, int sig) 449 { 450 struct linux_emuldata *em, *td_em, *tmp_em; 451 struct proc *sp; 452 453 LIN_SDT_PROBE2(emul, linux_kill_threads, entry, td, sig); 454 455 td_em = em_find(td->td_proc, EMUL_DONTLOCK); 456 457 KASSERT(td_em != NULL, ("linux_kill_threads: emuldata not found.\n")); 458 459 EMUL_SHARED_RLOCK(&emul_shared_lock); 460 LIST_FOREACH_SAFE(em, &td_em->shared->threads, threads, tmp_em) { 461 if (em->pid == td_em->pid) 462 continue; 463 464 sp = pfind(em->pid); 465 if ((sp->p_flag & P_WEXIT) == 0) 466 kern_psignal(sp, sig); 467 PROC_UNLOCK(sp); 468 469 LIN_SDT_PROBE1(emul, linux_kill_threads, kill, em->pid); 470 } 471 EMUL_SHARED_RUNLOCK(&emul_shared_lock); 472 473 LIN_SDT_PROBE0(emul, linux_kill_threads, return); 474 } 475