1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 #include <errno.h> 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <strings.h> 30 #include <unistd.h> 31 #include <sys/auxv.h> 32 #include <sys/bitmap.h> 33 #include <sys/brand.h> 34 #include <sys/inttypes.h> 35 #include <sys/lwp.h> 36 #include <sys/syscall.h> 37 #include <sys/systm.h> 38 #include <sys/utsname.h> 39 #include <fcntl.h> 40 #include <brand_misc.h> 41 #include <sys/brand.h> 42 43 extern brand_sysent_table_t brand_sysent_table[]; 44 45 /*LINTED: static unused*/ 46 static volatile int brand_abort_err; 47 /*LINTED: static unused*/ 48 static volatile const char *brand_abort_msg; 49 /*LINTED: static unused*/ 50 static volatile const char *brand_abort_file; 51 /*LINTED: static unused*/ 52 static volatile int brand_abort_line; 53 54 /* 55 * Principles of emulation 101. 56 * 57 * 58 * *** Setting errno 59 * 60 * Just don't do it. This emulation library is loaded onto a 61 * seperate link map from the application who's address space we're 62 * running in. We have our own private copy of libc, so there for, 63 * the errno value accessible from here is is also private and changing 64 * it will not affect any errno value that the processes who's address 65 * space we are running in will see. To return an error condition we 66 * should return the errno value we'd like the system to return. 67 * For more information about this see the comments in brand_misc.h. 68 * Basically, when we return to the caller that initiated the system 69 * call it's their responsibility to set errno. 70 * 71 * 72 * *** Recursion Considerations 73 * 74 * When emulating system calls we need to be very careful about what 75 * library calls we invoke. Library calls should be kept to a minimum. 76 * One issue is that library calls can invoke system calls, so if we're 77 * emulating a system call and we invoke a library call that depends on 78 * that system call we will probably enter a recursive loop, which would 79 * be bad. 80 * 81 * 82 * *** Return Values. 83 * 84 * See brand_misc.h. 85 * 86 * *** Agent lwp considerations 87 * 88 * It is currently impossible to do any emulation for these system call 89 * when they are being invoked on behalf of an agent lwp. To understand why 90 * it's impossible you have to understand how agent lwp syscalls work. 91 * 92 * The agent lwp syscall process works as follows: 93 * 1 The controlling process stops the target. 94 * 2 The controlling process injects an agent lwp which is also stopped. 95 * This agent lwp assumes the userland stack and register values 96 * of another stopped lwp in the current process. 97 * 3 The controlling process configures the agent lwp to start 98 * executing the requested system call. 99 * 4 The controlling process configure /proc to stop the agent lwp when 100 * it enters the requested system call. 101 * 5 The controlling processes allows the agent lwp to start executing. 102 * 6 The agent lwp traps into the kernel to perform the requested system 103 * call and immediately stop. 104 * 7 The controlling process copies all the arguments for the requested 105 * system call onto the agent lwp's stack. 106 * 8 The controlling process configures /proc to stop the agent lwp 107 * when it completes the requested system call. 108 * 9 The controlling processes allows the agent lwp to start executing. 109 * 10 The agent lwp executes the system call and then stop before returning 110 * to userland. 111 * 11 The controlling process copies the return value and return arguments 112 * back from the agent lwps stack. 113 * 12 The controlling process destroys the agent lwp and restarts 114 * the target process. 115 * 116 * The fundamental problem is that when the agent executes the request 117 * system call in step 5, if we're emulating that system call then the 118 * lwp is redirected back to our emulation layer without blocking 119 * in the kernel. But our emulation layer can't access the arguments 120 * for the system call because they haven't been copied to the stack 121 * yet and they still only exist in the controlling processes address 122 * space. This prevents us from being able to do any emulation of 123 * agent lwp system calls. Hence, currently our brand trap interposition 124 * callback (XXX_brand_syscall_callback_common) will detect if a system 125 * call is being made by an agent lwp, and if this is the case it will 126 * never redirect the system call to this emulation library. 127 * 128 * In the future, if this proves to be a problem the the easiest solution 129 * would probably be to replace the branded versions of these application 130 * with their native counterparts. Ie, truss, plimit, and pfiles could be 131 * replace with wrapper scripts that execute the native versions of these 132 * applications. In the case of plimit and pfiles this should be pretty 133 * strait forward. Truss would probably be more tricky since it can 134 * execute applications which would be branded applications, so in that 135 * case it might be necessary to create a loadable library which could 136 * be LD_PRELOADed into truss and this library would interpose on the 137 * exec() system call to allow truss to correctly execute branded 138 * processes. It should be pointed out that this solution could work 139 * because "native agent lwps" (ie, agent lwps created by native 140 * processes) can be treated differently from "branded aged lwps" (ie, 141 * agent lwps created by branded processes), since native agent lwps 142 * would presumably be making native system calls and hence not need 143 * any interposition. 144 * 145 * *** General considerations 146 * 147 * One of the differences between the lx brand and the s10 148 * brand, is that the s10 brand only interposes on syscalls 149 * that need some kind of emulation, whereas the lx brand interposes 150 * on _all_ system calls. Lx branded system calls that don't need 151 * any emulation are then redirected back to the kernel from the 152 * userland library via the IN_KERNEL_SYSCALL macro. The lx-syscall 153 * dtrace provider depends on this behavior. 154 * 155 */ 156 157 /*ARGSUSED*/ 158 void 159 _brand_abort(int err, const char *msg, const char *file, int line) 160 { 161 sysret_t rval; 162 163 /* Save the error message into convenient globals */ 164 brand_abort_err = err; 165 brand_abort_msg = msg; 166 brand_abort_file = file; 167 brand_abort_line = line; 168 169 /* kill ourselves */ 170 abort(); 171 172 /* If abort() didn't work, try something stronger. */ 173 (void) __systemcall(&rval, SYS_lwp_kill + 1024, _lwp_self(), SIGKILL); 174 } 175 176 int 177 brand_uucopy(const void *from, void *to, size_t size) 178 { 179 sysret_t rval; 180 181 if (__systemcall(&rval, SYS_uucopy + 1024, from, to, size) != 0) 182 return (EFAULT); 183 return (0); 184 } 185 186 /* 187 * ATTENTION: uucopystr() does NOT ensure that string are null terminated! 188 */ 189 int 190 brand_uucopystr(const void *from, void *to, size_t size) 191 { 192 sysret_t rval; 193 194 if (__systemcall(&rval, SYS_uucopystr + 1024, from, to, size) != 0) 195 return (EFAULT); 196 return (0); 197 } 198 199 /* 200 * This function is defined to be NOSYS but it won't be called from the 201 * the kernel since the NOSYS system calls are not enabled in the kernel. 202 * Thus, the only time this function is called is directly from within the 203 * indirect system call path. 204 */ 205 /*ARGSUSED*/ 206 long 207 brand_unimpl(sysret_t *rv, uintptr_t p1) 208 { 209 sysret_t rval; 210 211 /* 212 * We'd like to print out some kind of error message here like 213 * "unsupported syscall", but we can't because it's not safe to 214 * assume that stderr or STDERR_FILENO actually points to something 215 * that is a terminal, and if we wrote to those files we could 216 * inadvertantly write to some applications open files, which would 217 * be bad. 218 * 219 * Normally, if an application calls an invalid system call 220 * it get a SIGSYS sent to it. So we'll just go ahead and send 221 * ourselves a signal here. Note that this is far from ideal since 222 * if the application has registered a signal handler, that signal 223 * handler may recieve a ucontext_t as the third parameter to 224 * indicate the context of the process when the signal was 225 * generated, and in this case that context will not be what the 226 * application is expecting. Hence, we should probably create a 227 * brandsys() kernel function that can deliver the signal to us 228 * with the correct ucontext_t. 229 */ 230 (void) __systemcall(&rval, SYS_lwp_kill + 1024, _lwp_self(), SIGSYS); 231 return (ENOSYS); 232 } 233 234 #if defined(__sparc) && !defined(__sparcv9) 235 /* 236 * Yuck. For 32-bit sparc applications, handle indirect system calls. 237 * Note that we declare this interface to use the maximum number of 238 * system call arguments. If we recieve a system call that uses less 239 * arguments, then the additional arguments will be garbage, but they 240 * will also be ignored so that should be ok. 241 */ 242 long 243 brand_indir(sysret_t *rv, int code, 244 uintptr_t a0, uintptr_t a1, uintptr_t a2, uintptr_t a3, uintptr_t a4, 245 uintptr_t a5, uintptr_t a6, uintptr_t a7) 246 { 247 brand_sysent_table_t *sst = &(brand_sysent_table[code]); 248 249 brand_assert(code < NSYSCALL); 250 switch (sst->st_args & NARGS_MASK) { 251 case 0: 252 return ((sst->st_callc)(rv)); 253 case 1: 254 return ((sst->st_callc)(rv, a0)); 255 case 2: 256 return ((sst->st_callc)(rv, a0, a1)); 257 case 3: 258 return ((sst->st_callc)(rv, a0, a1, a2)); 259 case 4: 260 return ((sst->st_callc)(rv, a0, a1, a2, a3)); 261 case 5: 262 return ((sst->st_callc)(rv, a0, a1, a2, a3, a4)); 263 case 6: 264 return ((sst->st_callc)(rv, a0, a1, a2, a3, a4, a5)); 265 case 7: 266 return ((sst->st_callc)(rv, a0, a1, a2, a3, a4, a5, a6)); 267 case 8: 268 return ((sst->st_callc)(rv, a0, a1, a2, a3, a4, a5, a6, a7)); 269 } 270 brand_abort(0, "invalid entry in brand_sysent_table"); 271 return (EINVAL); 272 } 273 #endif /* __sparc && !__sparcv9 */ 274 275 /* 276 * Close a libc file handle, but don't actually close the underlying 277 * file descriptor. 278 */ 279 static void 280 brand_close_fh(FILE *file) 281 { 282 int fd, fd_new; 283 284 if (file == NULL) 285 return; 286 287 if ((fd = fileno(file)) < 0) 288 return; 289 290 /* 291 * We're a branded process but our handler isn't installed yet. We 292 * can't use the dup() syscall since it no longer exists. 293 */ 294 fd_new = fcntl(fd, F_DUPFD, 0); 295 if (fd_new == -1) 296 return; 297 298 (void) fclose(file); 299 (void) dup2(fd_new, fd); 300 (void) close(fd_new); 301 } 302 303 /*ARGSUSED*/ 304 void 305 brand_pre_init() 306 { 307 int i; 308 309 /* Sanity check our translation table return value codes */ 310 for (i = 0; i < NSYSCALL; i++) { 311 brand_sysent_table_t *est = &(brand_sysent_table[i]); 312 brand_assert(BIT_ONLYONESET(est->st_args & RV_MASK)); 313 } 314 315 /* 316 * We need to shutdown all libc stdio. libc stdio normally goes to 317 * file descriptors, but since we're actually part of a another 318 * process we don't own these file descriptors and we can't make 319 * any assumptions about their state. 320 */ 321 brand_close_fh(stdin); 322 brand_close_fh(stdout); 323 brand_close_fh(stderr); 324 } 325 326 /*ARGSUSED*/ 327 ulong_t 328 brand_post_init(int version, int argc, char *argv[], char *envp[]) 329 { 330 sysret_t rval; 331 brand_proc_reg_t reg; 332 brand_elf_data_t sed; 333 auxv_t *ap; 334 uintptr_t *p; 335 int err; 336 337 /* 338 * Register our syscall emulation table with the kernel. 339 * Note that we don't have to do invoke (syscall_number + 1024) 340 * until we've actually establised a syscall emulation callback 341 * handler address, which is what we're doing with this brand 342 * syscall. 343 */ 344 reg.sbr_version = version; 345 #ifdef __x86 346 reg.sbr_handler = (caddr_t)brand_handler_table; 347 #else /* !__x86 */ 348 reg.sbr_handler = (caddr_t)brand_handler; 349 #endif /* !__x86 */ 350 351 if ((err = __systemcall(&rval, SYS_brand, B_REGISTER, ®)) != 0) { 352 brand_abort(err, "Failed to brand current process"); 353 354 /*NOTREACHED*/ 355 } 356 357 /* Get data about the executable we're running from the kernel. */ 358 if ((err = __systemcall(&rval, SYS_brand + 1024, 359 B_ELFDATA, (void *)&sed)) != 0) { 360 brand_abort(err, 361 "Failed to get required brand ELF data from the kernel"); 362 /*NOTREACHED*/ 363 } 364 365 /* 366 * Find the aux vector on the stack. 367 */ 368 p = (uintptr_t *)envp; 369 while (*p != NULL) 370 p++; 371 372 /* 373 * p is now pointing at the 0 word after the environ pointers. 374 * After that is the aux vectors. 375 * 376 * The aux vectors are currently pointing to the brand emulation 377 * library and associated linker. We're going to change them to 378 * point to the brand executable and associated linker (or to no 379 * linker for static binaries). This matches the process data 380 * stored within the kernel and visible from /proc, which was 381 * all setup in sn1_elfexec(). We do this so that when a debugger 382 * attaches to the process it sees the process as a normal solaris 383 * process, this brand emulation library and everything on it's 384 * link map will not be visible, unless our librtld_db plugin 385 * is used. Note that this is very different from how Linux 386 * branded processes are implemented within lx branded zones. 387 * In that situation, the primary linkmap of the process is the 388 * brand emulation libraries linkmap, not the Linux applications 389 * linkmap. 390 * 391 * We also need to clear the AF_SUN_NOPLM flag from the AT_SUN_AUXFLAGS 392 * aux vector. This flag told our linker that we don't have a 393 * primary link map. Now that our linker is done initializing, we 394 * want to clear this flag before we transfer control to the 395 * applications copy of the linker, since we want that linker to have 396 * a primary link map which will be the link map for the application 397 * we're running. 398 */ 399 p++; 400 for (ap = (auxv_t *)p; ap->a_type != AT_NULL; ap++) { 401 switch (ap->a_type) { 402 case AT_BASE: 403 /* Hide AT_BASE if static binary */ 404 if (sed.sed_base == NULL) { 405 ap->a_type = AT_IGNORE; 406 ap->a_un.a_val = NULL; 407 } else { 408 ap->a_un.a_val = sed.sed_base; 409 } 410 break; 411 case AT_ENTRY: 412 ap->a_un.a_val = sed.sed_entry; 413 break; 414 case AT_PHDR: 415 ap->a_un.a_val = sed.sed_phdr; 416 break; 417 case AT_PHENT: 418 ap->a_un.a_val = sed.sed_phent; 419 break; 420 case AT_PHNUM: 421 ap->a_un.a_val = sed.sed_phnum; 422 break; 423 case AT_SUN_AUXFLAGS: 424 ap->a_un.a_val &= ~AF_SUN_NOPLM; 425 break; 426 case AT_SUN_EMULATOR: 427 /* 428 * ld.so.1 inspects AT_SUN_EMULATOR to see if 429 * if it is the linker for the brand emulation 430 * library. Hide AT_SUN_EMULATOR, as the 431 * linker we are about to jump to is the linker 432 * for the binary. 433 */ 434 ap->a_type = AT_IGNORE; 435 ap->a_un.a_val = NULL; 436 break; 437 case AT_SUN_LDDATA: 438 /* Hide AT_SUN_LDDATA if static binary */ 439 if (sed.sed_lddata == NULL) { 440 ap->a_type = AT_IGNORE; 441 ap->a_un.a_val = NULL; 442 } else { 443 ap->a_un.a_val = sed.sed_lddata; 444 } 445 break; 446 default: 447 break; 448 } 449 } 450 451 return (sed.sed_ldentry); 452 } 453