1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 1994-1996 Søren Schmidt 5 * Copyright (c) 2006 Roman Divacky 6 * Copyright (c) 2013 Dmitry Chagin 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/fcntl.h> 37 #include <sys/imgact.h> 38 #include <sys/kernel.h> 39 #include <sys/ktr.h> 40 #include <sys/lock.h> 41 #include <sys/malloc.h> 42 #include <sys/mutex.h> 43 #include <sys/sx.h> 44 #include <sys/proc.h> 45 #include <sys/resourcevar.h> 46 #include <sys/syscallsubr.h> 47 #include <sys/sysent.h> 48 49 #include <compat/linux/linux_emul.h> 50 #include <compat/linux/linux_mib.h> 51 #include <compat/linux/linux_misc.h> 52 #include <compat/linux/linux_persona.h> 53 #include <compat/linux/linux_util.h> 54 55 #if BYTE_ORDER == LITTLE_ENDIAN 56 #define SHELLMAGIC 0x2123 /* #! */ 57 #else 58 #define SHELLMAGIC 0x2321 59 #endif 60 61 /* 62 * This returns reference to the thread emuldata entry (if found) 63 * 64 * Hold PROC_LOCK when referencing emuldata from other threads. 65 */ 66 struct linux_emuldata * 67 em_find(struct thread *td) 68 { 69 struct linux_emuldata *em; 70 71 em = td->td_emuldata; 72 73 return (em); 74 } 75 76 /* 77 * This returns reference to the proc pemuldata entry (if found) 78 * 79 * Hold PROC_LOCK when referencing proc pemuldata from other threads. 80 * Hold LINUX_PEM_LOCK wher referencing pemuldata members. 81 */ 82 struct linux_pemuldata * 83 pem_find(struct proc *p) 84 { 85 struct linux_pemuldata *pem; 86 87 pem = p->p_emuldata; 88 89 return (pem); 90 } 91 92 /* 93 * Linux apps generally expect the soft open file limit to be set 94 * to 1024, often iterating over all the file descriptors up to that 95 * limit instead of using closefrom(2). Give them what they want, 96 * unless there already is a resource limit in place. 97 */ 98 static void 99 linux_set_default_openfiles(struct thread *td, struct proc *p) 100 { 101 struct rlimit rlim; 102 int error; 103 104 if (linux_default_openfiles < 0) 105 return; 106 107 PROC_LOCK(p); 108 lim_rlimit_proc(p, RLIMIT_NOFILE, &rlim); 109 PROC_UNLOCK(p); 110 if (rlim.rlim_cur != rlim.rlim_max || 111 rlim.rlim_cur <= linux_default_openfiles) 112 return; 113 rlim.rlim_cur = linux_default_openfiles; 114 error = kern_proc_setrlimit(td, p, RLIMIT_NOFILE, &rlim); 115 KASSERT(error == 0, ("kern_proc_setrlimit failed")); 116 } 117 118 /* 119 * The default stack size limit in Linux is 8MB. 120 */ 121 static void 122 linux_set_default_stacksize(struct thread *td, struct proc *p) 123 { 124 struct rlimit rlim; 125 int error; 126 127 if (linux_default_stacksize < 0) 128 return; 129 130 PROC_LOCK(p); 131 lim_rlimit_proc(p, RLIMIT_STACK, &rlim); 132 PROC_UNLOCK(p); 133 if (rlim.rlim_cur != rlim.rlim_max || 134 rlim.rlim_cur <= linux_default_stacksize) 135 return; 136 rlim.rlim_cur = linux_default_stacksize; 137 error = kern_proc_setrlimit(td, p, RLIMIT_STACK, &rlim); 138 KASSERT(error == 0, ("kern_proc_setrlimit failed")); 139 } 140 141 void 142 linux_proc_init(struct thread *td, struct thread *newtd, int flags) 143 { 144 struct linux_emuldata *em; 145 struct linux_pemuldata *pem; 146 struct epoll_emuldata *emd; 147 struct proc *p; 148 149 if (newtd != NULL) { 150 p = newtd->td_proc; 151 152 /* non-exec call */ 153 em = malloc(sizeof(*em), M_TEMP, M_WAITOK | M_ZERO); 154 if (flags & LINUX_CLONE_THREAD) { 155 LINUX_CTR1(proc_init, "thread newtd(%d)", 156 newtd->td_tid); 157 158 em->em_tid = newtd->td_tid; 159 } else { 160 LINUX_CTR1(proc_init, "fork newtd(%d)", p->p_pid); 161 162 em->em_tid = p->p_pid; 163 164 pem = malloc(sizeof(*pem), M_LINUX, M_WAITOK | M_ZERO); 165 sx_init(&pem->pem_sx, "lpemlk"); 166 p->p_emuldata = pem; 167 } 168 newtd->td_emuldata = em; 169 170 linux_set_default_openfiles(td, p); 171 linux_set_default_stacksize(td, p); 172 } else { 173 p = td->td_proc; 174 175 /* exec */ 176 LINUX_CTR1(proc_init, "exec newtd(%d)", p->p_pid); 177 178 /* lookup the old one */ 179 em = em_find(td); 180 KASSERT(em != NULL, ("proc_init: emuldata not found in exec case.\n")); 181 182 em->em_tid = p->p_pid; 183 em->flags = 0; 184 em->robust_futexes = NULL; 185 em->child_clear_tid = NULL; 186 em->child_set_tid = NULL; 187 188 /* epoll should be destroyed in a case of exec. */ 189 pem = pem_find(p); 190 KASSERT(pem != NULL, ("proc_exit: proc emuldata not found.\n")); 191 pem->persona = 0; 192 if (pem->epoll != NULL) { 193 emd = pem->epoll; 194 pem->epoll = NULL; 195 free(emd, M_EPOLL); 196 } 197 } 198 199 } 200 201 void 202 linux_proc_exit(void *arg __unused, struct proc *p) 203 { 204 struct linux_pemuldata *pem; 205 struct epoll_emuldata *emd; 206 struct thread *td = curthread; 207 208 if (__predict_false(SV_CURPROC_ABI() != SV_ABI_LINUX)) 209 return; 210 211 LINUX_CTR3(proc_exit, "thread(%d) proc(%d) p %p", 212 td->td_tid, p->p_pid, p); 213 214 pem = pem_find(p); 215 if (pem == NULL) 216 return; 217 (p->p_sysent->sv_thread_detach)(td); 218 219 p->p_emuldata = NULL; 220 221 if (pem->epoll != NULL) { 222 emd = pem->epoll; 223 pem->epoll = NULL; 224 free(emd, M_EPOLL); 225 } 226 227 sx_destroy(&pem->pem_sx); 228 free(pem, M_LINUX); 229 } 230 231 /* 232 * If a Linux binary is exec'ing something, try this image activator 233 * first. We override standard shell script execution in order to 234 * be able to modify the interpreter path. We only do this if a Linux 235 * binary is doing the exec, so we do not create an EXEC module for it. 236 */ 237 int 238 linux_exec_imgact_try(struct image_params *imgp) 239 { 240 const char *head = (const char *)imgp->image_header; 241 char *rpath; 242 int error = -1; 243 244 /* 245 * The interpreter for shell scripts run from a Linux binary needs 246 * to be located in /compat/linux if possible in order to recursively 247 * maintain Linux path emulation. 248 */ 249 if (((const short *)head)[0] == SHELLMAGIC) { 250 /* 251 * Run our normal shell image activator. If it succeeds attempt 252 * to use the alternate path for the interpreter. If an 253 * alternate path is found, use our stringspace to store it. 254 */ 255 if ((error = exec_shell_imgact(imgp)) == 0) { 256 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc), 257 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, 258 AT_FDCWD); 259 if (rpath != NULL) 260 imgp->args->fname_buf = 261 imgp->interpreter_name = rpath; 262 } 263 } 264 return (error); 265 } 266 267 int 268 linux_common_execve(struct thread *td, struct image_args *eargs) 269 { 270 struct linux_pemuldata *pem; 271 struct epoll_emuldata *emd; 272 struct vmspace *oldvmspace; 273 struct linux_emuldata *em; 274 struct proc *p; 275 int error; 276 277 p = td->td_proc; 278 279 error = pre_execve(td, &oldvmspace); 280 if (error != 0) 281 return (error); 282 283 error = kern_execve(td, eargs, NULL, oldvmspace); 284 post_execve(td, error, oldvmspace); 285 if (error != EJUSTRETURN) 286 return (error); 287 288 /* 289 * In a case of transition from Linux binary execing to 290 * FreeBSD binary we destroy Linux emuldata thread & proc entries. 291 */ 292 if (SV_CURPROC_ABI() != SV_ABI_LINUX) { 293 PROC_LOCK(p); 294 em = em_find(td); 295 KASSERT(em != NULL, ("proc_exec: thread emuldata not found.\n")); 296 td->td_emuldata = NULL; 297 298 pem = pem_find(p); 299 KASSERT(pem != NULL, ("proc_exec: proc pemuldata not found.\n")); 300 p->p_emuldata = NULL; 301 PROC_UNLOCK(p); 302 303 if (pem->epoll != NULL) { 304 emd = pem->epoll; 305 pem->epoll = NULL; 306 free(emd, M_EPOLL); 307 } 308 309 free(em, M_TEMP); 310 free(pem, M_LINUX); 311 } 312 return (EJUSTRETURN); 313 } 314 315 void 316 linux_proc_exec(void *arg __unused, struct proc *p, struct image_params *imgp) 317 { 318 struct thread *td; 319 struct thread *othertd; 320 #if defined(__amd64__) 321 struct linux_pemuldata *pem; 322 #endif 323 324 td = curthread; 325 326 /* 327 * In a case of execing to Linux binary we create Linux 328 * emuldata thread entry. 329 */ 330 if (__predict_false((imgp->sysent->sv_flags & SV_ABI_MASK) == 331 SV_ABI_LINUX)) { 332 if (SV_PROC_ABI(p) == SV_ABI_LINUX) { 333 /* 334 * Process already was under Linuxolator 335 * before exec. Update emuldata to reflect 336 * single-threaded cleaned state after exec. 337 */ 338 linux_proc_init(td, NULL, 0); 339 } else { 340 /* 341 * We are switching the process to Linux emulator. 342 */ 343 linux_proc_init(td, td, 0); 344 345 /* 346 * Create a transient td_emuldata for all suspended 347 * threads, so that p->p_sysent->sv_thread_detach() == 348 * linux_thread_detach() can find expected but unused 349 * emuldata. 350 */ 351 FOREACH_THREAD_IN_PROC(td->td_proc, othertd) { 352 if (othertd != td) { 353 linux_proc_init(td, othertd, 354 LINUX_CLONE_THREAD); 355 } 356 } 357 } 358 #if defined(__amd64__) 359 /* 360 * An IA32 executable which has executable stack will have the 361 * READ_IMPLIES_EXEC personality flag set automatically. 362 */ 363 if (SV_PROC_FLAG(td->td_proc, SV_ILP32) && 364 imgp->stack_prot & VM_PROT_EXECUTE) { 365 pem = pem_find(p); 366 pem->persona |= LINUX_READ_IMPLIES_EXEC; 367 } 368 #endif 369 } 370 } 371 372 void 373 linux_thread_dtor(void *arg __unused, struct thread *td) 374 { 375 struct linux_emuldata *em; 376 377 em = em_find(td); 378 if (em == NULL) 379 return; 380 td->td_emuldata = NULL; 381 382 LINUX_CTR1(thread_dtor, "thread(%d)", em->em_tid); 383 384 free(em, M_TEMP); 385 } 386 387 void 388 linux_schedtail(struct thread *td) 389 { 390 struct linux_emuldata *em; 391 struct proc *p; 392 int error = 0; 393 int *child_set_tid; 394 395 p = td->td_proc; 396 397 em = em_find(td); 398 KASSERT(em != NULL, ("linux_schedtail: thread emuldata not found.\n")); 399 child_set_tid = em->child_set_tid; 400 401 if (child_set_tid != NULL) { 402 error = copyout(&em->em_tid, child_set_tid, 403 sizeof(em->em_tid)); 404 LINUX_CTR4(schedtail, "thread(%d) %p stored %d error %d", 405 td->td_tid, child_set_tid, em->em_tid, error); 406 } else 407 LINUX_CTR1(schedtail, "thread(%d)", em->em_tid); 408 } 409