1 /*- 2 * Copyright (c) 2006 Roman Divacky 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include "opt_compat.h" 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/imgact.h> 37 #include <sys/kernel.h> 38 #include <sys/lock.h> 39 #include <sys/malloc.h> 40 #include <sys/mutex.h> 41 #include <sys/sdt.h> 42 #include <sys/sx.h> 43 #include <sys/proc.h> 44 #include <sys/syscallsubr.h> 45 #include <sys/sysent.h> 46 #include <sys/sysproto.h> 47 #include <sys/unistd.h> 48 49 #ifdef COMPAT_LINUX32 50 #include <machine/../linux32/linux.h> 51 #include <machine/../linux32/linux32_proto.h> 52 #else 53 #include <machine/../linux/linux.h> 54 #include <machine/../linux/linux_proto.h> 55 #endif 56 57 #include <compat/linux/linux_dtrace.h> 58 #include <compat/linux/linux_emul.h> 59 #include <compat/linux/linux_futex.h> 60 #include <compat/linux/linux_misc.h> 61 62 /** 63 * Special DTrace provider for the linuxulator. 64 * 65 * In this file we define the provider for the entire linuxulator. All 66 * modules (= files of the linuxulator) use it. 67 * 68 * We define a different name depending on the emulated bitsize, see 69 * ../../<ARCH>/linux{,32}/linux.h, e.g.: 70 * native bitsize = linuxulator 71 * amd64, 32bit emulation = linuxulator32 72 */ 73 LIN_SDT_PROVIDER_DEFINE(LINUX_DTRACE); 74 75 /** 76 * Special DTrace module "locks", it covers some linuxulator internal 77 * locks. 78 */ 79 LIN_SDT_PROBE_DEFINE1(locks, emul_lock, locked, "struct mtx *"); 80 LIN_SDT_PROBE_DEFINE1(locks, emul_lock, unlock, "struct mtx *"); 81 LIN_SDT_PROBE_DEFINE1(locks, emul_shared_rlock, locked, "struct sx *"); 82 LIN_SDT_PROBE_DEFINE1(locks, emul_shared_rlock, unlock, "struct sx *"); 83 LIN_SDT_PROBE_DEFINE1(locks, emul_shared_wlock, locked, "struct sx *"); 84 LIN_SDT_PROBE_DEFINE1(locks, emul_shared_wlock, unlock, "struct sx *"); 85 86 /** 87 * DTrace probes in this module. 88 */ 89 LIN_SDT_PROBE_DEFINE2(emul, em_find, entry, "struct proc *", "int"); 90 LIN_SDT_PROBE_DEFINE0(emul, em_find, return); 91 LIN_SDT_PROBE_DEFINE3(emul, proc_init, entry, "struct thread *", "pid_t", 92 "int"); 93 LIN_SDT_PROBE_DEFINE0(emul, proc_init, create_thread); 94 LIN_SDT_PROBE_DEFINE0(emul, proc_init, fork); 95 LIN_SDT_PROBE_DEFINE0(emul, proc_init, exec); 96 LIN_SDT_PROBE_DEFINE0(emul, proc_init, return); 97 LIN_SDT_PROBE_DEFINE1(emul, proc_exit, entry, "struct proc *"); 98 LIN_SDT_PROBE_DEFINE0(emul, proc_exit, futex_failed); 99 LIN_SDT_PROBE_DEFINE3(emul, proc_exit, reparent, "pid_t", "pid_t", 100 "struct proc *"); 101 LIN_SDT_PROBE_DEFINE1(emul, proc_exit, child_clear_tid_error, "int"); 102 LIN_SDT_PROBE_DEFINE0(emul, proc_exit, return); 103 LIN_SDT_PROBE_DEFINE2(emul, proc_exec, entry, "struct proc *", 104 "struct image_params *"); 105 LIN_SDT_PROBE_DEFINE0(emul, proc_exec, return); 106 LIN_SDT_PROBE_DEFINE0(emul, linux_schedtail, entry); 107 LIN_SDT_PROBE_DEFINE1(emul, linux_schedtail, copyout_error, "int"); 108 LIN_SDT_PROBE_DEFINE0(emul, linux_schedtail, return); 109 LIN_SDT_PROBE_DEFINE1(emul, linux_set_tid_address, entry, "int *"); 110 LIN_SDT_PROBE_DEFINE0(emul, linux_set_tid_address, return); 111 LIN_SDT_PROBE_DEFINE2(emul, linux_kill_threads, entry, "struct thread *", 112 "int"); 113 LIN_SDT_PROBE_DEFINE1(emul, linux_kill_threads, kill, "pid_t"); 114 LIN_SDT_PROBE_DEFINE0(emul, linux_kill_threads, return); 115 116 struct sx emul_shared_lock; 117 struct mtx emul_lock; 118 119 /* this returns locked reference to the emuldata entry (if found) */ 120 struct linux_emuldata * 121 em_find(struct proc *p, int locked) 122 { 123 struct linux_emuldata *em; 124 125 LIN_SDT_PROBE2(emul, em_find, entry, p, locked); 126 127 if (locked == EMUL_DOLOCK) 128 EMUL_LOCK(&emul_lock); 129 130 em = p->p_emuldata; 131 132 if (em == NULL && locked == EMUL_DOLOCK) 133 EMUL_UNLOCK(&emul_lock); 134 135 LIN_SDT_PROBE1(emul, em_find, return, em); 136 return (em); 137 } 138 139 int 140 linux_proc_init(struct thread *td, pid_t child, int flags) 141 { 142 struct linux_emuldata *em, *p_em; 143 struct proc *p; 144 145 LIN_SDT_PROBE3(emul, proc_init, entry, td, child, flags); 146 147 if (child != 0) { 148 /* fork or create a thread */ 149 em = malloc(sizeof *em, M_LINUX, M_WAITOK | M_ZERO); 150 em->pid = child; 151 em->pdeath_signal = 0; 152 em->flags = 0; 153 em->robust_futexes = NULL; 154 if (flags & LINUX_CLONE_THREAD) { 155 /* handled later in the code */ 156 LIN_SDT_PROBE0(emul, proc_init, create_thread); 157 } else { 158 struct linux_emuldata_shared *s; 159 160 LIN_SDT_PROBE0(emul, proc_init, fork); 161 162 s = malloc(sizeof *s, M_LINUX, M_WAITOK | M_ZERO); 163 s->refs = 1; 164 s->group_pid = child; 165 166 LIST_INIT(&s->threads); 167 em->shared = s; 168 } 169 } else { 170 /* exec */ 171 LIN_SDT_PROBE0(emul, proc_init, exec); 172 173 /* lookup the old one */ 174 em = em_find(td->td_proc, EMUL_DOLOCK); 175 KASSERT(em != NULL, ("proc_init: emuldata not found in exec case.\n")); 176 } 177 178 em->child_clear_tid = NULL; 179 em->child_set_tid = NULL; 180 181 /* 182 * allocate the shared struct only in clone()/fork cases in the case 183 * of clone() td = calling proc and child = pid of the newly created 184 * proc 185 */ 186 if (child != 0) { 187 if (flags & LINUX_CLONE_THREAD) { 188 /* lookup the parent */ 189 /* 190 * we dont have to lock the p_em because 191 * its waiting for us in linux_clone so 192 * there is no chance of it changing the 193 * p_em->shared address 194 */ 195 p_em = em_find(td->td_proc, EMUL_DONTLOCK); 196 KASSERT(p_em != NULL, ("proc_init: parent emuldata not found for CLONE_THREAD\n")); 197 em->shared = p_em->shared; 198 EMUL_SHARED_WLOCK(&emul_shared_lock); 199 em->shared->refs++; 200 EMUL_SHARED_WUNLOCK(&emul_shared_lock); 201 } else { 202 /* 203 * handled earlier to avoid malloc(M_WAITOK) with 204 * rwlock held 205 */ 206 } 207 208 EMUL_SHARED_WLOCK(&emul_shared_lock); 209 LIST_INSERT_HEAD(&em->shared->threads, em, threads); 210 EMUL_SHARED_WUNLOCK(&emul_shared_lock); 211 212 p = pfind(child); 213 KASSERT(p != NULL, ("process not found in proc_init\n")); 214 p->p_emuldata = em; 215 PROC_UNLOCK(p); 216 } else 217 EMUL_UNLOCK(&emul_lock); 218 219 LIN_SDT_PROBE0(emul, proc_init, return); 220 return (0); 221 } 222 223 void 224 linux_proc_exit(void *arg __unused, struct proc *p) 225 { 226 struct linux_emuldata *em; 227 int error, shared_flags, shared_xstat; 228 struct thread *td = FIRST_THREAD_IN_PROC(p); 229 int *child_clear_tid; 230 struct proc *q, *nq; 231 232 if (__predict_true(p->p_sysent != &elf_linux_sysvec)) 233 return; 234 235 LIN_SDT_PROBE1(emul, proc_exit, entry, p); 236 237 release_futexes(p); 238 239 /* find the emuldata */ 240 em = em_find(p, EMUL_DOLOCK); 241 242 KASSERT(em != NULL, ("proc_exit: emuldata not found.\n")); 243 244 /* reparent all procs that are not a thread leader to initproc */ 245 if (em->shared->group_pid != p->p_pid) { 246 LIN_SDT_PROBE3(emul, proc_exit, reparent, 247 em->shared->group_pid, p->p_pid, p); 248 249 child_clear_tid = em->child_clear_tid; 250 EMUL_UNLOCK(&emul_lock); 251 sx_xlock(&proctree_lock); 252 wakeup(initproc); 253 PROC_LOCK(p); 254 proc_reparent(p, initproc); 255 p->p_sigparent = SIGCHLD; 256 PROC_UNLOCK(p); 257 sx_xunlock(&proctree_lock); 258 } else { 259 child_clear_tid = em->child_clear_tid; 260 EMUL_UNLOCK(&emul_lock); 261 } 262 263 EMUL_SHARED_WLOCK(&emul_shared_lock); 264 shared_flags = em->shared->flags; 265 shared_xstat = em->shared->xstat; 266 LIST_REMOVE(em, threads); 267 268 em->shared->refs--; 269 if (em->shared->refs == 0) { 270 EMUL_SHARED_WUNLOCK(&emul_shared_lock); 271 free(em->shared, M_LINUX); 272 } else 273 EMUL_SHARED_WUNLOCK(&emul_shared_lock); 274 275 if ((shared_flags & EMUL_SHARED_HASXSTAT) != 0) 276 p->p_xstat = shared_xstat; 277 278 if (child_clear_tid != NULL) { 279 struct linux_sys_futex_args cup; 280 int null = 0; 281 282 error = copyout(&null, child_clear_tid, sizeof(null)); 283 if (error) { 284 LIN_SDT_PROBE1(emul, proc_exit, 285 child_clear_tid_error, error); 286 287 free(em, M_LINUX); 288 289 LIN_SDT_PROBE0(emul, proc_exit, return); 290 return; 291 } 292 293 /* futexes stuff */ 294 cup.uaddr = child_clear_tid; 295 cup.op = LINUX_FUTEX_WAKE; 296 cup.val = 0x7fffffff; /* Awake everyone */ 297 cup.timeout = NULL; 298 cup.uaddr2 = NULL; 299 cup.val3 = 0; 300 error = linux_sys_futex(FIRST_THREAD_IN_PROC(p), &cup); 301 /* 302 * this cannot happen at the moment and if this happens it 303 * probably means there is a user space bug 304 */ 305 if (error) { 306 LIN_SDT_PROBE0(emul, proc_exit, futex_failed); 307 printf(LMSG("futex stuff in proc_exit failed.\n")); 308 } 309 } 310 311 /* clean the stuff up */ 312 free(em, M_LINUX); 313 314 /* this is a little weird but rewritten from exit1() */ 315 sx_xlock(&proctree_lock); 316 q = LIST_FIRST(&p->p_children); 317 for (; q != NULL; q = nq) { 318 nq = LIST_NEXT(q, p_sibling); 319 if (q->p_flag & P_WEXIT) 320 continue; 321 if (__predict_false(q->p_sysent != &elf_linux_sysvec)) 322 continue; 323 em = em_find(q, EMUL_DOLOCK); 324 KASSERT(em != NULL, ("linux_reparent: emuldata not found: %i\n", q->p_pid)); 325 PROC_LOCK(q); 326 if ((q->p_flag & P_WEXIT) == 0 && em->pdeath_signal != 0) { 327 kern_psignal(q, em->pdeath_signal); 328 } 329 PROC_UNLOCK(q); 330 EMUL_UNLOCK(&emul_lock); 331 } 332 sx_xunlock(&proctree_lock); 333 334 LIN_SDT_PROBE0(emul, proc_exit, return); 335 } 336 337 /* 338 * This is used in a case of transition from FreeBSD binary execing to linux binary 339 * in this case we create linux emuldata proc entry with the pid of the currently running 340 * process. 341 */ 342 void 343 linux_proc_exec(void *arg __unused, struct proc *p, struct image_params *imgp) 344 { 345 if (__predict_false(imgp->sysent == &elf_linux_sysvec)) { 346 LIN_SDT_PROBE2(emul, proc_exec, entry, p, imgp); 347 } 348 if (__predict_false(imgp->sysent == &elf_linux_sysvec 349 && p->p_sysent != &elf_linux_sysvec)) 350 linux_proc_init(FIRST_THREAD_IN_PROC(p), p->p_pid, 0); 351 if (__predict_false((p->p_sysent->sv_flags & SV_ABI_MASK) == 352 SV_ABI_LINUX)) 353 /* Kill threads regardless of imgp->sysent value */ 354 linux_kill_threads(FIRST_THREAD_IN_PROC(p), SIGKILL); 355 if (__predict_false(imgp->sysent != &elf_linux_sysvec 356 && p->p_sysent == &elf_linux_sysvec)) { 357 struct linux_emuldata *em; 358 359 /* 360 * XXX:There's a race because here we assign p->p_emuldata NULL 361 * but the process is still counted as linux one for a short 362 * time so some other process might reference it and try to 363 * access its p->p_emuldata and panicing on a NULL reference. 364 */ 365 em = em_find(p, EMUL_DONTLOCK); 366 367 KASSERT(em != NULL, ("proc_exec: emuldata not found.\n")); 368 369 EMUL_SHARED_WLOCK(&emul_shared_lock); 370 LIST_REMOVE(em, threads); 371 372 PROC_LOCK(p); 373 p->p_emuldata = NULL; 374 PROC_UNLOCK(p); 375 376 em->shared->refs--; 377 if (em->shared->refs == 0) { 378 EMUL_SHARED_WUNLOCK(&emul_shared_lock); 379 free(em->shared, M_LINUX); 380 } else 381 EMUL_SHARED_WUNLOCK(&emul_shared_lock); 382 383 free(em, M_LINUX); 384 } 385 386 if (__predict_false(imgp->sysent == &elf_linux_sysvec)) { 387 LIN_SDT_PROBE0(emul, proc_exec, return); 388 } 389 } 390 391 void 392 linux_schedtail(struct thread *td) 393 { 394 struct linux_emuldata *em; 395 struct proc *p; 396 int error = 0; 397 int *child_set_tid; 398 399 p = td->td_proc; 400 401 LIN_SDT_PROBE1(emul, linux_schedtail, entry, p); 402 403 /* find the emuldata */ 404 em = em_find(p, EMUL_DOLOCK); 405 406 KASSERT(em != NULL, ("linux_schedtail: emuldata not found.\n")); 407 child_set_tid = em->child_set_tid; 408 EMUL_UNLOCK(&emul_lock); 409 410 if (child_set_tid != NULL) { 411 error = copyout(&p->p_pid, (int *)child_set_tid, 412 sizeof(p->p_pid)); 413 414 if (error != 0) { 415 LIN_SDT_PROBE1(emul, linux_schedtail, copyout_error, 416 error); 417 } 418 } 419 420 LIN_SDT_PROBE0(emul, linux_schedtail, return); 421 422 return; 423 } 424 425 int 426 linux_set_tid_address(struct thread *td, struct linux_set_tid_address_args *args) 427 { 428 struct linux_emuldata *em; 429 430 LIN_SDT_PROBE1(emul, linux_set_tid_address, entry, args->tidptr); 431 432 /* find the emuldata */ 433 em = em_find(td->td_proc, EMUL_DOLOCK); 434 435 KASSERT(em != NULL, ("set_tid_address: emuldata not found.\n")); 436 437 em->child_clear_tid = args->tidptr; 438 td->td_retval[0] = td->td_proc->p_pid; 439 440 EMUL_UNLOCK(&emul_lock); 441 442 LIN_SDT_PROBE0(emul, linux_set_tid_address, return); 443 return 0; 444 } 445 446 void 447 linux_kill_threads(struct thread *td, int sig) 448 { 449 struct linux_emuldata *em, *td_em, *tmp_em; 450 struct proc *sp; 451 452 LIN_SDT_PROBE2(emul, linux_kill_threads, entry, td, sig); 453 454 td_em = em_find(td->td_proc, EMUL_DONTLOCK); 455 456 KASSERT(td_em != NULL, ("linux_kill_threads: emuldata not found.\n")); 457 458 EMUL_SHARED_RLOCK(&emul_shared_lock); 459 LIST_FOREACH_SAFE(em, &td_em->shared->threads, threads, tmp_em) { 460 if (em->pid == td_em->pid) 461 continue; 462 463 sp = pfind(em->pid); 464 if ((sp->p_flag & P_WEXIT) == 0) 465 kern_psignal(sp, sig); 466 PROC_UNLOCK(sp); 467 468 LIN_SDT_PROBE1(emul, linux_kill_threads, kill, em->pid); 469 } 470 EMUL_SHARED_RUNLOCK(&emul_shared_lock); 471 472 LIN_SDT_PROBE0(emul, linux_kill_threads, return); 473 } 474