1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/fasttrap_isa.h> 30 #include <sys/fasttrap_impl.h> 31 #include <sys/dtrace.h> 32 #include <sys/dtrace_impl.h> 33 #include <sys/cmn_err.h> 34 #include <sys/regset.h> 35 #include <sys/privregs.h> 36 #include <sys/segments.h> 37 #include <sys/x86_archext.h> 38 #include <sys/sysmacros.h> 39 #include <sys/trap.h> 40 #include <sys/archsystm.h> 41 42 /* 43 * Lossless User-Land Tracing on x86 44 * --------------------------------- 45 * 46 * The execution of most instructions is not dependent on the address; for 47 * these instructions it is sufficient to copy them into the user process's 48 * address space and execute them. To effectively single-step an instruction 49 * in user-land, we copy out the following sequence of instructions to scratch 50 * space in the user thread's ulwp_t structure. 51 * 52 * We then set the program counter (%eip or %rip) to point to this scratch 53 * space. Once execution resumes, the original instruction is executed and 54 * then control flow is redirected to what was originally the subsequent 55 * instruction. If the kernel attemps to deliver a signal while single- 56 * stepping, the signal is deferred and the program counter is moved into the 57 * second sequence of instructions. The second sequence ends in a trap into 58 * the kernel where the deferred signal is then properly handled and delivered. 59 * 60 * For instructions whose execute is position dependent, we perform simple 61 * emulation. These instructions are limited to control transfer 62 * instructions in 32-bit mode, but in 64-bit mode there's the added wrinkle 63 * of %rip-relative addressing that means that almost any instruction can be 64 * position dependent. For all the details on how we emulate generic 65 * instructions included %rip-relative instructions, see the code in 66 * fasttrap_pid_probe() below where we handle instructions of type 67 * FASTTRAP_T_COMMON (under the header: Generic Instruction Tracing). 68 */ 69 70 #define FASTTRAP_MODRM_MOD(modrm) (((modrm) >> 6) & 0x3) 71 #define FASTTRAP_MODRM_REG(modrm) (((modrm) >> 3) & 0x7) 72 #define FASTTRAP_MODRM_RM(modrm) ((modrm) & 0x7) 73 #define FASTTRAP_MODRM(mod, reg, rm) (((mod) << 6) | ((reg) << 3) | (rm)) 74 75 #define FASTTRAP_SIB_SCALE(sib) (((sib) >> 6) & 0x3) 76 #define FASTTRAP_SIB_INDEX(sib) (((sib) >> 3) & 0x7) 77 #define FASTTRAP_SIB_BASE(sib) ((sib) & 0x7) 78 79 #define FASTTRAP_REX_W(rex) (((rex) >> 3) & 1) 80 #define FASTTRAP_REX_R(rex) (((rex) >> 2) & 1) 81 #define FASTTRAP_REX_X(rex) (((rex) >> 1) & 1) 82 #define FASTTRAP_REX_B(rex) ((rex) & 1) 83 #define FASTTRAP_REX(w, r, x, b) \ 84 (0x40 | ((w) << 3) | ((r) << 2) | ((x) << 1) | (b)) 85 86 /* 87 * Single-byte op-codes. 88 */ 89 #define FASTTRAP_PUSHL_EBP 0x55 90 91 #define FASTTRAP_JO 0x70 92 #define FASTTRAP_JNO 0x71 93 #define FASTTRAP_JB 0x72 94 #define FASTTRAP_JAE 0x73 95 #define FASTTRAP_JE 0x74 96 #define FASTTRAP_JNE 0x75 97 #define FASTTRAP_JBE 0x76 98 #define FASTTRAP_JA 0x77 99 #define FASTTRAP_JS 0x78 100 #define FASTTRAP_JNS 0x79 101 #define FASTTRAP_JP 0x7a 102 #define FASTTRAP_JNP 0x7b 103 #define FASTTRAP_JL 0x7c 104 #define FASTTRAP_JGE 0x7d 105 #define FASTTRAP_JLE 0x7e 106 #define FASTTRAP_JG 0x7f 107 108 #define FASTTRAP_NOP 0x90 109 110 #define FASTTRAP_MOV_EAX 0xb8 111 #define FASTTRAP_MOV_ECX 0xb9 112 113 #define FASTTRAP_RET16 0xc2 114 #define FASTTRAP_RET 0xc3 115 116 #define FASTTRAP_LOOPNZ 0xe0 117 #define FASTTRAP_LOOPZ 0xe1 118 #define FASTTRAP_LOOP 0xe2 119 #define FASTTRAP_JCXZ 0xe3 120 121 #define FASTTRAP_CALL 0xe8 122 #define FASTTRAP_JMP32 0xe9 123 #define FASTTRAP_JMP8 0xeb 124 125 #define FASTTRAP_INT3 0xcc 126 #define FASTTRAP_INT 0xcd 127 128 #define FASTTRAP_2_BYTE_OP 0x0f 129 #define FASTTRAP_GROUP5_OP 0xff 130 131 /* 132 * Two-byte op-codes (second byte only). 133 */ 134 #define FASTTRAP_0F_JO 0x80 135 #define FASTTRAP_0F_JNO 0x81 136 #define FASTTRAP_0F_JB 0x82 137 #define FASTTRAP_0F_JAE 0x83 138 #define FASTTRAP_0F_JE 0x84 139 #define FASTTRAP_0F_JNE 0x85 140 #define FASTTRAP_0F_JBE 0x86 141 #define FASTTRAP_0F_JA 0x87 142 #define FASTTRAP_0F_JS 0x88 143 #define FASTTRAP_0F_JNS 0x89 144 #define FASTTRAP_0F_JP 0x8a 145 #define FASTTRAP_0F_JNP 0x8b 146 #define FASTTRAP_0F_JL 0x8c 147 #define FASTTRAP_0F_JGE 0x8d 148 #define FASTTRAP_0F_JLE 0x8e 149 #define FASTTRAP_0F_JG 0x8f 150 151 #define FASTTRAP_EFLAGS_OF 0x800 152 #define FASTTRAP_EFLAGS_DF 0x400 153 #define FASTTRAP_EFLAGS_SF 0x080 154 #define FASTTRAP_EFLAGS_ZF 0x040 155 #define FASTTRAP_EFLAGS_AF 0x010 156 #define FASTTRAP_EFLAGS_PF 0x004 157 #define FASTTRAP_EFLAGS_CF 0x001 158 159 /* 160 * Instruction prefixes. 161 */ 162 #define FASTTRAP_PREFIX_OPERAND 0x66 163 #define FASTTRAP_PREFIX_ADDRESS 0x67 164 #define FASTTRAP_PREFIX_CS 0x2E 165 #define FASTTRAP_PREFIX_DS 0x3E 166 #define FASTTRAP_PREFIX_ES 0x26 167 #define FASTTRAP_PREFIX_FS 0x64 168 #define FASTTRAP_PREFIX_GS 0x65 169 #define FASTTRAP_PREFIX_SS 0x36 170 #define FASTTRAP_PREFIX_LOCK 0xF0 171 #define FASTTRAP_PREFIX_REP 0xF3 172 #define FASTTRAP_PREFIX_REPNE 0xF2 173 174 #define FASTTRAP_NOREG 0xff 175 176 /* 177 * Map between instruction register encodings and the kernel constants which 178 * correspond to indicies into struct regs. 179 */ 180 #ifdef __amd64 181 static const uint8_t regmap[16] = { 182 REG_RAX, REG_RCX, REG_RDX, REG_RBX, REG_RSP, REG_RBP, REG_RSI, REG_RDI, 183 REG_R8, REG_R9, REG_R10, REG_R11, REG_R12, REG_R13, REG_R14, REG_R15, 184 }; 185 #else 186 static const uint8_t regmap[8] = { 187 EAX, ECX, EDX, EBX, UESP, EBP, ESI, EDI 188 }; 189 #endif 190 191 static ulong_t fasttrap_getreg(struct regs *, uint_t); 192 193 static uint64_t 194 fasttrap_anarg(struct regs *rp, int function_entry, int argno) 195 { 196 uint64_t value; 197 int shift = function_entry ? 1 : 0; 198 199 #ifdef __amd64 200 if (curproc->p_model == DATAMODEL_LP64) { 201 uintptr_t *stack; 202 203 /* 204 * In 64-bit mode, the first six arguments are stored in 205 * registers. 206 */ 207 if (argno < 6) 208 return ((&rp->r_rdi)[argno]); 209 210 stack = (uintptr_t *)rp->r_sp; 211 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 212 value = dtrace_fulword(&stack[argno - 6 + shift]); 213 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR); 214 } else { 215 #endif 216 uint32_t *stack = (uint32_t *)rp->r_sp; 217 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 218 value = dtrace_fuword32(&stack[argno + shift]); 219 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR); 220 #ifdef __amd64 221 } 222 #endif 223 224 return (value); 225 } 226 227 /*ARGSUSED*/ 228 int 229 fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, uintptr_t pc, 230 fasttrap_probe_type_t type) 231 { 232 uint8_t instr[FASTTRAP_MAX_INSTR_SIZE + 10]; 233 size_t len = FASTTRAP_MAX_INSTR_SIZE; 234 size_t first = MIN(len, PAGESIZE - (pc & PAGEOFFSET)); 235 uint_t start = 0; 236 int rmindex, size; 237 uint8_t seg, rex = 0; 238 239 /* 240 * Read the instruction at the given address out of the process's 241 * address space. We don't have to worry about a debugger 242 * changing this instruction before we overwrite it with our trap 243 * instruction since P_PR_LOCK is set. Since instructions can span 244 * pages, we potentially read the instruction in two parts. If the 245 * second part fails, we just zero out that part of the instruction. 246 */ 247 if (uread(p, &instr[0], first, pc) != 0) 248 return (-1); 249 if (len > first && 250 uread(p, &instr[first], len - first, pc + first) != 0) { 251 bzero(&instr[first], len - first); 252 len = first; 253 } 254 255 /* 256 * If the disassembly fails, then we have a malformed instruction. 257 */ 258 if ((size = dtrace_instr_size_isa(instr, p->p_model, &rmindex)) <= 0) 259 return (-1); 260 261 /* 262 * Make sure the disassembler isn't completely broken. 263 */ 264 ASSERT(-1 <= rmindex && rmindex < size); 265 266 /* 267 * If the computed size is greater than the number of bytes read, 268 * then it was a malformed instruction possibly because it fell on a 269 * page boundary and the subsequent page was missing or because of 270 * some malicious user. 271 */ 272 if (size > len) 273 return (-1); 274 275 tp->ftt_size = (uint8_t)size; 276 tp->ftt_segment = FASTTRAP_SEG_NONE; 277 278 /* 279 * Find the start of the instruction's opcode by processing any 280 * legacy prefixes. 281 */ 282 for (;;) { 283 seg = 0; 284 switch (instr[start]) { 285 case FASTTRAP_PREFIX_SS: 286 seg++; 287 /*FALLTHRU*/ 288 case FASTTRAP_PREFIX_GS: 289 seg++; 290 /*FALLTHRU*/ 291 case FASTTRAP_PREFIX_FS: 292 seg++; 293 /*FALLTHRU*/ 294 case FASTTRAP_PREFIX_ES: 295 seg++; 296 /*FALLTHRU*/ 297 case FASTTRAP_PREFIX_DS: 298 seg++; 299 /*FALLTHRU*/ 300 case FASTTRAP_PREFIX_CS: 301 seg++; 302 /*FALLTHRU*/ 303 case FASTTRAP_PREFIX_OPERAND: 304 case FASTTRAP_PREFIX_ADDRESS: 305 case FASTTRAP_PREFIX_LOCK: 306 case FASTTRAP_PREFIX_REP: 307 case FASTTRAP_PREFIX_REPNE: 308 if (seg != 0) { 309 /* 310 * It's illegal for an instruction to specify 311 * two segment prefixes -- give up on this 312 * illegal instruction. 313 */ 314 if (tp->ftt_segment != FASTTRAP_SEG_NONE) 315 return (-1); 316 317 tp->ftt_segment = seg; 318 } 319 start++; 320 continue; 321 } 322 break; 323 } 324 325 #ifdef __amd64 326 /* 327 * Identify the REX prefix on 64-bit processes. 328 */ 329 if (p->p_model == DATAMODEL_LP64 && (instr[start] & 0xf0) == 0x40) 330 rex = instr[start++]; 331 #endif 332 333 /* 334 * Now that we're pretty sure that the instruction is okay, copy the 335 * valid part to the tracepoint. 336 */ 337 bcopy(instr, tp->ftt_instr, FASTTRAP_MAX_INSTR_SIZE); 338 339 tp->ftt_type = FASTTRAP_T_COMMON; 340 if (instr[start] == FASTTRAP_2_BYTE_OP) { 341 switch (instr[start + 1]) { 342 case FASTTRAP_0F_JO: 343 case FASTTRAP_0F_JNO: 344 case FASTTRAP_0F_JB: 345 case FASTTRAP_0F_JAE: 346 case FASTTRAP_0F_JE: 347 case FASTTRAP_0F_JNE: 348 case FASTTRAP_0F_JBE: 349 case FASTTRAP_0F_JA: 350 case FASTTRAP_0F_JS: 351 case FASTTRAP_0F_JNS: 352 case FASTTRAP_0F_JP: 353 case FASTTRAP_0F_JNP: 354 case FASTTRAP_0F_JL: 355 case FASTTRAP_0F_JGE: 356 case FASTTRAP_0F_JLE: 357 case FASTTRAP_0F_JG: 358 tp->ftt_type = FASTTRAP_T_JCC; 359 tp->ftt_code = (instr[start + 1] & 0x0f) | FASTTRAP_JO; 360 tp->ftt_dest = pc + tp->ftt_size + 361 *(int32_t *)&instr[start + 2]; 362 break; 363 } 364 } else if (instr[start] == FASTTRAP_GROUP5_OP) { 365 uint_t mod = FASTTRAP_MODRM_MOD(instr[start + 1]); 366 uint_t reg = FASTTRAP_MODRM_REG(instr[start + 1]); 367 uint_t rm = FASTTRAP_MODRM_RM(instr[start + 1]); 368 369 if (reg == 2 || reg == 4) { 370 uint_t i, sz; 371 372 if (reg == 2) 373 tp->ftt_type = FASTTRAP_T_CALL; 374 else 375 tp->ftt_type = FASTTRAP_T_JMP; 376 377 if (mod == 3) 378 tp->ftt_code = 2; 379 else 380 tp->ftt_code = 1; 381 382 ASSERT(p->p_model == DATAMODEL_LP64 || rex == 0); 383 384 /* 385 * See AMD x86-64 Architecture Programmer's Manual 386 * Volume 3, Section 1.2.7, Table 1-12, and 387 * Appendix A.3.1, Table A-15. 388 */ 389 if (mod != 3 && rm == 4) { 390 uint8_t sib = instr[start + 2]; 391 uint_t index = FASTTRAP_SIB_INDEX(sib); 392 uint_t base = FASTTRAP_SIB_BASE(sib); 393 394 tp->ftt_scale = FASTTRAP_SIB_SCALE(sib); 395 396 tp->ftt_index = (index == 4) ? 397 FASTTRAP_NOREG : 398 regmap[index | (FASTTRAP_REX_X(rex) << 3)]; 399 tp->ftt_base = (mod == 0 && base == 5) ? 400 FASTTRAP_NOREG : 401 regmap[base | (FASTTRAP_REX_B(rex) << 3)]; 402 403 i = 3; 404 sz = mod == 1 ? 1 : 4; 405 } else { 406 /* 407 * In 64-bit mode, mod == 0 and r/m == 5 408 * denotes %rip-relative addressing; in 32-bit 409 * mode, the base register isn't used. In both 410 * modes, there is a 32-bit operand. 411 */ 412 if (mod == 0 && rm == 5) { 413 #ifdef __amd64 414 if (p->p_model == DATAMODEL_LP64) 415 tp->ftt_base = REG_RIP; 416 else 417 #endif 418 tp->ftt_base = FASTTRAP_NOREG; 419 sz = 4; 420 } else { 421 uint8_t base = rm | 422 (FASTTRAP_REX_B(rex) << 3); 423 424 tp->ftt_base = regmap[base]; 425 sz = mod == 1 ? 1 : mod == 2 ? 4 : 0; 426 } 427 tp->ftt_index = FASTTRAP_NOREG; 428 i = 2; 429 } 430 431 if (sz == 1) 432 tp->ftt_dest = *(int8_t *)&instr[start + i]; 433 else if (sz == 4) 434 tp->ftt_dest = *(int32_t *)&instr[start + i]; 435 else 436 tp->ftt_dest = 0; 437 } 438 } else { 439 switch (instr[start]) { 440 case FASTTRAP_RET: 441 tp->ftt_type = FASTTRAP_T_RET; 442 break; 443 444 case FASTTRAP_RET16: 445 tp->ftt_type = FASTTRAP_T_RET16; 446 tp->ftt_dest = *(uint16_t *)&instr[start + 1]; 447 break; 448 449 case FASTTRAP_JO: 450 case FASTTRAP_JNO: 451 case FASTTRAP_JB: 452 case FASTTRAP_JAE: 453 case FASTTRAP_JE: 454 case FASTTRAP_JNE: 455 case FASTTRAP_JBE: 456 case FASTTRAP_JA: 457 case FASTTRAP_JS: 458 case FASTTRAP_JNS: 459 case FASTTRAP_JP: 460 case FASTTRAP_JNP: 461 case FASTTRAP_JL: 462 case FASTTRAP_JGE: 463 case FASTTRAP_JLE: 464 case FASTTRAP_JG: 465 tp->ftt_type = FASTTRAP_T_JCC; 466 tp->ftt_code = instr[start]; 467 tp->ftt_dest = pc + tp->ftt_size + 468 (int8_t)instr[start + 1]; 469 break; 470 471 case FASTTRAP_LOOPNZ: 472 case FASTTRAP_LOOPZ: 473 case FASTTRAP_LOOP: 474 tp->ftt_type = FASTTRAP_T_LOOP; 475 tp->ftt_code = instr[start]; 476 tp->ftt_dest = pc + tp->ftt_size + 477 (int8_t)instr[start + 1]; 478 break; 479 480 case FASTTRAP_JCXZ: 481 tp->ftt_type = FASTTRAP_T_JCXZ; 482 tp->ftt_dest = pc + tp->ftt_size + 483 (int8_t)instr[start + 1]; 484 break; 485 486 case FASTTRAP_CALL: 487 tp->ftt_type = FASTTRAP_T_CALL; 488 tp->ftt_dest = pc + tp->ftt_size + 489 *(int32_t *)&instr[start + 1]; 490 tp->ftt_code = 0; 491 break; 492 493 case FASTTRAP_JMP32: 494 tp->ftt_type = FASTTRAP_T_JMP; 495 tp->ftt_dest = pc + tp->ftt_size + 496 *(int32_t *)&instr[start + 1]; 497 break; 498 case FASTTRAP_JMP8: 499 tp->ftt_type = FASTTRAP_T_JMP; 500 tp->ftt_dest = pc + tp->ftt_size + 501 (int8_t)instr[start + 1]; 502 break; 503 504 case FASTTRAP_PUSHL_EBP: 505 if (start == 0) 506 tp->ftt_type = FASTTRAP_T_PUSHL_EBP; 507 break; 508 509 case FASTTRAP_NOP: 510 #ifdef __amd64 511 ASSERT(p->p_model == DATAMODEL_LP64 || rex == 0); 512 513 /* 514 * On amd64 we have to be careful not to confuse a nop 515 * (actually xchgl %eax, %eax) with an instruction using 516 * the same opcode, but that does something different 517 * (e.g. xchgl %r8d, %eax or xcghq %r8, %rax). 518 */ 519 if (FASTTRAP_REX_B(rex) == 0) 520 #endif 521 tp->ftt_type = FASTTRAP_T_NOP; 522 break; 523 524 case FASTTRAP_INT3: 525 /* 526 * The pid provider shares the int3 trap with debugger 527 * breakpoints so we can't instrument them. 528 */ 529 ASSERT(instr[start] == FASTTRAP_INSTR); 530 return (-1); 531 532 case FASTTRAP_INT: 533 /* 534 * Interrupts seem like they could be traced with 535 * no negative implications, but it's possible that 536 * a thread could be redirected by the trap handling 537 * code which would eventually return to the 538 * instruction after the interrupt. If the interrupt 539 * were in our scratch space, the subsequent 540 * instruction might be overwritten before we return. 541 * Accordingly we refuse to instrument any interrupt. 542 */ 543 return (-1); 544 } 545 } 546 547 #ifdef __amd64 548 if (p->p_model == DATAMODEL_LP64 && tp->ftt_type == FASTTRAP_T_COMMON) { 549 /* 550 * If the process is 64-bit and the instruction type is still 551 * FASTTRAP_T_COMMON -- meaning we're going to copy it out an 552 * execute it -- we need to watch for %rip-relative 553 * addressing mode. See the portion of fasttrap_pid_probe() 554 * below where we handle tracepoints with type 555 * FASTTRAP_T_COMMON for how we emulate instructions that 556 * employ %rip-relative addressing. 557 */ 558 if (rmindex != -1) { 559 uint_t mod = FASTTRAP_MODRM_MOD(instr[rmindex]); 560 uint_t reg = FASTTRAP_MODRM_REG(instr[rmindex]); 561 uint_t rm = FASTTRAP_MODRM_RM(instr[rmindex]); 562 563 ASSERT(rmindex > start); 564 565 if (mod == 0 && rm == 5) { 566 /* 567 * We need to be sure to avoid other 568 * registers used by this instruction. While 569 * the reg field may determine the op code 570 * rather than denoting a register, assuming 571 * that it denotes a register is always safe. 572 * We leave the REX field intact and use 573 * whatever value's there for simplicity. 574 */ 575 if (reg != 0) { 576 tp->ftt_ripmode = FASTTRAP_RIP_1 | 577 (FASTTRAP_RIP_X * 578 FASTTRAP_REX_B(rex)); 579 rm = 0; 580 } else { 581 tp->ftt_ripmode = FASTTRAP_RIP_2 | 582 (FASTTRAP_RIP_X * 583 FASTTRAP_REX_B(rex)); 584 rm = 1; 585 } 586 587 tp->ftt_modrm = tp->ftt_instr[rmindex]; 588 tp->ftt_instr[rmindex] = 589 FASTTRAP_MODRM(2, reg, rm); 590 } 591 } 592 } 593 #endif 594 595 return (0); 596 } 597 598 int 599 fasttrap_tracepoint_install(proc_t *p, fasttrap_tracepoint_t *tp) 600 { 601 fasttrap_instr_t instr = FASTTRAP_INSTR; 602 603 if (uwrite(p, &instr, 1, tp->ftt_pc) != 0) 604 return (-1); 605 606 return (0); 607 } 608 609 int 610 fasttrap_tracepoint_remove(proc_t *p, fasttrap_tracepoint_t *tp) 611 { 612 uint8_t instr; 613 614 /* 615 * Distinguish between read or write failures and a changed 616 * instruction. 617 */ 618 if (uread(p, &instr, 1, tp->ftt_pc) != 0) 619 return (0); 620 if (instr != FASTTRAP_INSTR) 621 return (0); 622 if (uwrite(p, &tp->ftt_instr[0], 1, tp->ftt_pc) != 0) 623 return (-1); 624 625 return (0); 626 } 627 628 static uintptr_t 629 fasttrap_fulword_noerr(const void *uaddr) 630 { 631 uintptr_t ret; 632 633 if (fasttrap_fulword(uaddr, &ret) == 0) 634 return (ret); 635 636 return (0); 637 } 638 639 static uint32_t 640 fasttrap_fuword32_noerr(const void *uaddr) 641 { 642 uint32_t ret; 643 644 if (fasttrap_fuword32(uaddr, &ret) == 0) 645 return (ret); 646 647 return (0); 648 } 649 650 static void 651 fasttrap_return_common(struct regs *rp, uintptr_t pc, pid_t pid, 652 uintptr_t new_pc) 653 { 654 fasttrap_tracepoint_t *tp; 655 fasttrap_bucket_t *bucket; 656 fasttrap_id_t *id; 657 kmutex_t *pid_mtx; 658 659 pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock; 660 mutex_enter(pid_mtx); 661 bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]; 662 663 for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { 664 if (pid == tp->ftt_pid && pc == tp->ftt_pc && 665 !tp->ftt_proc->ftpc_defunct) 666 break; 667 } 668 669 /* 670 * Don't sweat it if we can't find the tracepoint again; unlike 671 * when we're in fasttrap_pid_probe(), finding the tracepoint here 672 * is not essential to the correct execution of the process. 673 */ 674 if (tp == NULL) { 675 mutex_exit(pid_mtx); 676 return; 677 } 678 679 for (id = tp->ftt_retids; id != NULL; id = id->fti_next) { 680 /* 681 * If there's a branch that could act as a return site, we 682 * need to trace it, and check here if the program counter is 683 * external to the function. 684 */ 685 if (tp->ftt_type != FASTTRAP_T_RET && 686 tp->ftt_type != FASTTRAP_T_RET16 && 687 new_pc - id->fti_probe->ftp_faddr < 688 id->fti_probe->ftp_fsize) 689 continue; 690 691 dtrace_probe(id->fti_probe->ftp_id, 692 pc - id->fti_probe->ftp_faddr, 693 rp->r_r0, rp->r_r1, 0, 0); 694 } 695 696 mutex_exit(pid_mtx); 697 } 698 699 static void 700 fasttrap_sigsegv(proc_t *p, kthread_t *t, uintptr_t addr) 701 { 702 sigqueue_t *sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); 703 704 sqp->sq_info.si_signo = SIGSEGV; 705 sqp->sq_info.si_code = SEGV_MAPERR; 706 sqp->sq_info.si_addr = (caddr_t)addr; 707 708 mutex_enter(&p->p_lock); 709 sigaddqa(p, t, sqp); 710 mutex_exit(&p->p_lock); 711 712 if (t != NULL) 713 aston(t); 714 } 715 716 #ifdef __amd64 717 static void 718 fasttrap_usdt_args64(fasttrap_probe_t *probe, struct regs *rp, int argc, 719 uintptr_t *argv) 720 { 721 int i, x, cap = MIN(argc, probe->ftp_nargs); 722 uintptr_t *stack = (uintptr_t *)rp->r_sp; 723 724 for (i = 0; i < cap; i++) { 725 x = probe->ftp_argmap[i]; 726 727 if (x < 6) 728 argv[i] = (&rp->r_rdi)[x]; 729 else 730 argv[i] = fasttrap_fulword_noerr(&stack[x]); 731 } 732 733 for (; i < argc; i++) { 734 argv[i] = 0; 735 } 736 } 737 #endif 738 739 static void 740 fasttrap_usdt_args32(fasttrap_probe_t *probe, struct regs *rp, int argc, 741 uint32_t *argv) 742 { 743 int i, x, cap = MIN(argc, probe->ftp_nargs); 744 uint32_t *stack = (uint32_t *)rp->r_sp; 745 746 for (i = 0; i < cap; i++) { 747 x = probe->ftp_argmap[i]; 748 749 argv[i] = fasttrap_fuword32_noerr(&stack[x]); 750 } 751 752 for (; i < argc; i++) { 753 argv[i] = 0; 754 } 755 } 756 757 static int 758 fasttrap_do_seg(fasttrap_tracepoint_t *tp, struct regs *rp, uintptr_t *addr) 759 { 760 proc_t *p = curproc; 761 user_desc_t *desc; 762 uint16_t sel, ndx, type; 763 uintptr_t limit; 764 765 switch (tp->ftt_segment) { 766 case FASTTRAP_SEG_CS: 767 sel = rp->r_cs; 768 break; 769 case FASTTRAP_SEG_DS: 770 sel = rp->r_ds; 771 break; 772 case FASTTRAP_SEG_ES: 773 sel = rp->r_es; 774 break; 775 case FASTTRAP_SEG_FS: 776 sel = rp->r_fs; 777 break; 778 case FASTTRAP_SEG_GS: 779 sel = rp->r_gs; 780 break; 781 case FASTTRAP_SEG_SS: 782 sel = rp->r_ss; 783 break; 784 } 785 786 /* 787 * Make sure the given segment register specifies a user priority 788 * selector rather than a kernel selector. 789 */ 790 if (!SELISUPL(sel)) 791 return (-1); 792 793 ndx = SELTOIDX(sel); 794 795 /* 796 * Check the bounds and grab the descriptor out of the specified 797 * descriptor table. 798 */ 799 if (SELISLDT(sel)) { 800 if (ndx > p->p_ldtlimit) 801 return (-1); 802 803 desc = p->p_ldt + ndx; 804 805 } else { 806 if (ndx >= NGDT) 807 return (-1); 808 809 desc = cpu_get_gdt() + ndx; 810 } 811 812 /* 813 * The descriptor must have user privilege level and it must be 814 * present in memory. 815 */ 816 if (desc->usd_dpl != SEL_UPL || desc->usd_p != 1) 817 return (-1); 818 819 type = desc->usd_type; 820 821 /* 822 * If the S bit in the type field is not set, this descriptor can 823 * only be used in system context. 824 */ 825 if ((type & 0x10) != 0x10) 826 return (-1); 827 828 limit = USEGD_GETLIMIT(desc) * (desc->usd_gran ? PAGESIZE : 1); 829 830 if (tp->ftt_segment == FASTTRAP_SEG_CS) { 831 /* 832 * The code/data bit and readable bit must both be set. 833 */ 834 if ((type & 0xa) != 0xa) 835 return (-1); 836 837 if (*addr > limit) 838 return (-1); 839 } else { 840 /* 841 * The code/data bit must be clear. 842 */ 843 if ((type & 0x8) != 0) 844 return (-1); 845 846 /* 847 * If the expand-down bit is clear, we just check the limit as 848 * it would naturally be applied. Otherwise, we need to check 849 * that the address is the range [limit + 1 .. 0xffff] or 850 * [limit + 1 ... 0xffffffff] depending on if the default 851 * operand size bit is set. 852 */ 853 if ((type & 0x4) == 0) { 854 if (*addr > limit) 855 return (-1); 856 } else if (desc->usd_def32) { 857 if (*addr < limit + 1 || 0xffff < *addr) 858 return (-1); 859 } else { 860 if (*addr < limit + 1 || 0xffffffff < *addr) 861 return (-1); 862 } 863 } 864 865 *addr += USEGD_GETBASE(desc); 866 867 return (0); 868 } 869 870 int 871 fasttrap_pid_probe(struct regs *rp) 872 { 873 proc_t *p = curproc; 874 uintptr_t pc = rp->r_pc - 1, new_pc = 0; 875 fasttrap_bucket_t *bucket; 876 kmutex_t *pid_mtx; 877 fasttrap_tracepoint_t *tp, tp_local; 878 pid_t pid; 879 dtrace_icookie_t cookie; 880 uint_t is_enabled = 0; 881 882 /* 883 * It's possible that a user (in a veritable orgy of bad planning) 884 * could redirect this thread's flow of control before it reached the 885 * return probe fasttrap. In this case we need to kill the process 886 * since it's in a unrecoverable state. 887 */ 888 if (curthread->t_dtrace_step) { 889 ASSERT(curthread->t_dtrace_on); 890 fasttrap_sigtrap(p, curthread, pc); 891 return (0); 892 } 893 894 /* 895 * Clear all user tracing flags. 896 */ 897 curthread->t_dtrace_ft = 0; 898 curthread->t_dtrace_pc = 0; 899 curthread->t_dtrace_npc = 0; 900 curthread->t_dtrace_scrpc = 0; 901 curthread->t_dtrace_astpc = 0; 902 #ifdef __amd64 903 curthread->t_dtrace_regv = 0; 904 #endif 905 906 /* 907 * Treat a child created by a call to vfork(2) as if it were its 908 * parent. We know that there's only one thread of control in such a 909 * process: this one. 910 */ 911 while (p->p_flag & SVFORK) { 912 p = p->p_parent; 913 } 914 915 pid = p->p_pid; 916 pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock; 917 mutex_enter(pid_mtx); 918 bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]; 919 920 /* 921 * Lookup the tracepoint that the process just hit. 922 */ 923 for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { 924 if (pid == tp->ftt_pid && pc == tp->ftt_pc && 925 !tp->ftt_proc->ftpc_defunct) 926 break; 927 } 928 929 /* 930 * If we couldn't find a matching tracepoint, either a tracepoint has 931 * been inserted without using the pid<pid> ioctl interface (see 932 * fasttrap_ioctl), or somehow we have mislaid this tracepoint. 933 */ 934 if (tp == NULL) { 935 mutex_exit(pid_mtx); 936 return (-1); 937 } 938 939 /* 940 * Set the program counter to the address of the traced instruction 941 * so that it looks right in ustack() output. 942 */ 943 rp->r_pc = pc; 944 945 if (tp->ftt_ids != NULL) { 946 fasttrap_id_t *id; 947 948 #ifdef __amd64 949 if (p->p_model == DATAMODEL_LP64) { 950 for (id = tp->ftt_ids; id != NULL; id = id->fti_next) { 951 fasttrap_probe_t *probe = id->fti_probe; 952 953 if (id->fti_ptype == DTFTP_ENTRY) { 954 /* 955 * We note that this was an entry 956 * probe to help ustack() find the 957 * first caller. 958 */ 959 cookie = dtrace_interrupt_disable(); 960 DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY); 961 dtrace_probe(probe->ftp_id, rp->r_rdi, 962 rp->r_rsi, rp->r_rdx, rp->r_rcx, 963 rp->r_r8); 964 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY); 965 dtrace_interrupt_enable(cookie); 966 } else if (id->fti_ptype == DTFTP_IS_ENABLED) { 967 /* 968 * Note that in this case, we don't 969 * call dtrace_probe() since it's only 970 * an artificial probe meant to change 971 * the flow of control so that it 972 * encounters the true probe. 973 */ 974 is_enabled = 1; 975 } else if (probe->ftp_argmap == NULL) { 976 dtrace_probe(probe->ftp_id, rp->r_rdi, 977 rp->r_rsi, rp->r_rdx, rp->r_rcx, 978 rp->r_r8); 979 } else { 980 uintptr_t t[5]; 981 982 fasttrap_usdt_args64(probe, rp, 983 sizeof (t) / sizeof (t[0]), t); 984 985 dtrace_probe(probe->ftp_id, t[0], t[1], 986 t[2], t[3], t[4]); 987 } 988 } 989 } else { 990 #endif 991 uintptr_t s0, s1, s2, s3, s4, s5; 992 uint32_t *stack = (uint32_t *)rp->r_sp; 993 994 /* 995 * In 32-bit mode, all arguments are passed on the 996 * stack. If this is a function entry probe, we need 997 * to skip the first entry on the stack as it 998 * represents the return address rather than a 999 * parameter to the function. 1000 */ 1001 s0 = fasttrap_fuword32_noerr(&stack[0]); 1002 s1 = fasttrap_fuword32_noerr(&stack[1]); 1003 s2 = fasttrap_fuword32_noerr(&stack[2]); 1004 s3 = fasttrap_fuword32_noerr(&stack[3]); 1005 s4 = fasttrap_fuword32_noerr(&stack[4]); 1006 s5 = fasttrap_fuword32_noerr(&stack[5]); 1007 1008 for (id = tp->ftt_ids; id != NULL; id = id->fti_next) { 1009 fasttrap_probe_t *probe = id->fti_probe; 1010 1011 if (id->fti_ptype == DTFTP_ENTRY) { 1012 /* 1013 * We note that this was an entry 1014 * probe to help ustack() find the 1015 * first caller. 1016 */ 1017 cookie = dtrace_interrupt_disable(); 1018 DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY); 1019 dtrace_probe(probe->ftp_id, s1, s2, 1020 s3, s4, s5); 1021 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY); 1022 dtrace_interrupt_enable(cookie); 1023 } else if (id->fti_ptype == DTFTP_IS_ENABLED) { 1024 /* 1025 * Note that in this case, we don't 1026 * call dtrace_probe() since it's only 1027 * an artificial probe meant to change 1028 * the flow of control so that it 1029 * encounters the true probe. 1030 */ 1031 is_enabled = 1; 1032 } else if (probe->ftp_argmap == NULL) { 1033 dtrace_probe(probe->ftp_id, s0, s1, 1034 s2, s3, s4); 1035 } else { 1036 uint32_t t[5]; 1037 1038 fasttrap_usdt_args32(probe, rp, 1039 sizeof (t) / sizeof (t[0]), t); 1040 1041 dtrace_probe(probe->ftp_id, t[0], t[1], 1042 t[2], t[3], t[4]); 1043 } 1044 } 1045 #ifdef __amd64 1046 } 1047 #endif 1048 } 1049 1050 /* 1051 * We're about to do a bunch of work so we cache a local copy of 1052 * the tracepoint to emulate the instruction, and then find the 1053 * tracepoint again later if we need to light up any return probes. 1054 */ 1055 tp_local = *tp; 1056 mutex_exit(pid_mtx); 1057 tp = &tp_local; 1058 1059 /* 1060 * Set the program counter to appear as though the traced instruction 1061 * had completely executed. This ensures that fasttrap_getreg() will 1062 * report the expected value for REG_RIP. 1063 */ 1064 rp->r_pc = pc + tp->ftt_size; 1065 1066 /* 1067 * If there's an is-enabled probe connected to this tracepoint it 1068 * means that there was a 'xorl %eax, %eax' or 'xorq %rax, %rax' 1069 * instruction that was placed there by DTrace when the binary was 1070 * linked. As this probe is, in fact, enabled, we need to stuff 1 1071 * into %eax or %rax. Accordingly, we can bypass all the instruction 1072 * emulation logic since we know the inevitable result. It's possible 1073 * that a user could construct a scenario where the 'is-enabled' 1074 * probe was on some other instruction, but that would be a rather 1075 * exotic way to shoot oneself in the foot. 1076 */ 1077 if (is_enabled) { 1078 rp->r_r0 = 1; 1079 new_pc = rp->r_pc; 1080 goto done; 1081 } 1082 1083 /* 1084 * We emulate certain types of instructions to ensure correctness 1085 * (in the case of position dependent instructions) or optimize 1086 * common cases. The rest we have the thread execute back in user- 1087 * land. 1088 */ 1089 switch (tp->ftt_type) { 1090 case FASTTRAP_T_RET: 1091 case FASTTRAP_T_RET16: 1092 { 1093 uintptr_t dst; 1094 uintptr_t addr; 1095 int ret; 1096 1097 /* 1098 * We have to emulate _every_ facet of the behavior of a ret 1099 * instruction including what happens if the load from %esp 1100 * fails; in that case, we send a SIGSEGV. 1101 */ 1102 #ifdef __amd64 1103 if (p->p_model == DATAMODEL_NATIVE) { 1104 #endif 1105 ret = fasttrap_fulword((void *)rp->r_sp, &dst); 1106 addr = rp->r_sp + sizeof (uintptr_t); 1107 #ifdef __amd64 1108 } else { 1109 uint32_t dst32; 1110 ret = fasttrap_fuword32((void *)rp->r_sp, &dst32); 1111 dst = dst32; 1112 addr = rp->r_sp + sizeof (uint32_t); 1113 } 1114 #endif 1115 1116 if (ret == -1) { 1117 fasttrap_sigsegv(p, curthread, rp->r_sp); 1118 new_pc = pc; 1119 break; 1120 } 1121 1122 if (tp->ftt_type == FASTTRAP_T_RET16) 1123 addr += tp->ftt_dest; 1124 1125 rp->r_sp = addr; 1126 new_pc = dst; 1127 break; 1128 } 1129 1130 case FASTTRAP_T_JCC: 1131 { 1132 uint_t taken; 1133 1134 switch (tp->ftt_code) { 1135 case FASTTRAP_JO: 1136 taken = (rp->r_ps & FASTTRAP_EFLAGS_OF) != 0; 1137 break; 1138 case FASTTRAP_JNO: 1139 taken = (rp->r_ps & FASTTRAP_EFLAGS_OF) == 0; 1140 break; 1141 case FASTTRAP_JB: 1142 taken = (rp->r_ps & FASTTRAP_EFLAGS_CF) != 0; 1143 break; 1144 case FASTTRAP_JAE: 1145 taken = (rp->r_ps & FASTTRAP_EFLAGS_CF) == 0; 1146 break; 1147 case FASTTRAP_JE: 1148 taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) != 0; 1149 break; 1150 case FASTTRAP_JNE: 1151 taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) == 0; 1152 break; 1153 case FASTTRAP_JBE: 1154 taken = (rp->r_ps & FASTTRAP_EFLAGS_CF) != 0 || 1155 (rp->r_ps & FASTTRAP_EFLAGS_ZF) != 0; 1156 break; 1157 case FASTTRAP_JA: 1158 taken = (rp->r_ps & FASTTRAP_EFLAGS_CF) == 0 && 1159 (rp->r_ps & FASTTRAP_EFLAGS_ZF) == 0; 1160 break; 1161 case FASTTRAP_JS: 1162 taken = (rp->r_ps & FASTTRAP_EFLAGS_SF) != 0; 1163 break; 1164 case FASTTRAP_JNS: 1165 taken = (rp->r_ps & FASTTRAP_EFLAGS_SF) == 0; 1166 break; 1167 case FASTTRAP_JP: 1168 taken = (rp->r_ps & FASTTRAP_EFLAGS_PF) != 0; 1169 break; 1170 case FASTTRAP_JNP: 1171 taken = (rp->r_ps & FASTTRAP_EFLAGS_PF) == 0; 1172 break; 1173 case FASTTRAP_JL: 1174 taken = ((rp->r_ps & FASTTRAP_EFLAGS_SF) == 0) != 1175 ((rp->r_ps & FASTTRAP_EFLAGS_OF) == 0); 1176 break; 1177 case FASTTRAP_JGE: 1178 taken = ((rp->r_ps & FASTTRAP_EFLAGS_SF) == 0) == 1179 ((rp->r_ps & FASTTRAP_EFLAGS_OF) == 0); 1180 break; 1181 case FASTTRAP_JLE: 1182 taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) != 0 || 1183 ((rp->r_ps & FASTTRAP_EFLAGS_SF) == 0) != 1184 ((rp->r_ps & FASTTRAP_EFLAGS_OF) == 0); 1185 break; 1186 case FASTTRAP_JG: 1187 taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) == 0 && 1188 ((rp->r_ps & FASTTRAP_EFLAGS_SF) == 0) == 1189 ((rp->r_ps & FASTTRAP_EFLAGS_OF) == 0); 1190 break; 1191 1192 } 1193 1194 if (taken) 1195 new_pc = tp->ftt_dest; 1196 else 1197 new_pc = pc + tp->ftt_size; 1198 break; 1199 } 1200 1201 case FASTTRAP_T_LOOP: 1202 { 1203 uint_t taken; 1204 #ifdef __amd64 1205 greg_t cx = rp->r_rcx--; 1206 #else 1207 greg_t cx = rp->r_ecx--; 1208 #endif 1209 1210 switch (tp->ftt_code) { 1211 case FASTTRAP_LOOPNZ: 1212 taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) == 0 && 1213 cx != 0; 1214 break; 1215 case FASTTRAP_LOOPZ: 1216 taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) != 0 && 1217 cx != 0; 1218 break; 1219 case FASTTRAP_LOOP: 1220 taken = (cx != 0); 1221 break; 1222 } 1223 1224 if (taken) 1225 new_pc = tp->ftt_dest; 1226 else 1227 new_pc = pc + tp->ftt_size; 1228 break; 1229 } 1230 1231 case FASTTRAP_T_JCXZ: 1232 { 1233 #ifdef __amd64 1234 greg_t cx = rp->r_rcx; 1235 #else 1236 greg_t cx = rp->r_ecx; 1237 #endif 1238 1239 if (cx == 0) 1240 new_pc = tp->ftt_dest; 1241 else 1242 new_pc = pc + tp->ftt_size; 1243 break; 1244 } 1245 1246 case FASTTRAP_T_PUSHL_EBP: 1247 { 1248 int ret; 1249 uintptr_t addr; 1250 #ifdef __amd64 1251 if (p->p_model == DATAMODEL_NATIVE) { 1252 #endif 1253 addr = rp->r_sp - sizeof (uintptr_t); 1254 ret = fasttrap_sulword((void *)addr, rp->r_fp); 1255 #ifdef __amd64 1256 } else { 1257 addr = rp->r_sp - sizeof (uint32_t); 1258 ret = fasttrap_suword32((void *)addr, 1259 (uint32_t)rp->r_fp); 1260 } 1261 #endif 1262 1263 if (ret == -1) { 1264 fasttrap_sigsegv(p, curthread, addr); 1265 new_pc = pc; 1266 break; 1267 } 1268 1269 rp->r_sp = addr; 1270 new_pc = pc + tp->ftt_size; 1271 break; 1272 } 1273 1274 case FASTTRAP_T_NOP: 1275 new_pc = pc + tp->ftt_size; 1276 break; 1277 1278 case FASTTRAP_T_JMP: 1279 case FASTTRAP_T_CALL: 1280 if (tp->ftt_code == 0) { 1281 new_pc = tp->ftt_dest; 1282 } else { 1283 uintptr_t value, addr = tp->ftt_dest; 1284 1285 if (tp->ftt_base != FASTTRAP_NOREG) 1286 addr += fasttrap_getreg(rp, tp->ftt_base); 1287 if (tp->ftt_index != FASTTRAP_NOREG) 1288 addr += fasttrap_getreg(rp, tp->ftt_index) << 1289 tp->ftt_scale; 1290 1291 if (tp->ftt_code == 1) { 1292 /* 1293 * If there's a segment prefix for this 1294 * instruction, we'll need to check permissions 1295 * and bounds on the given selector, and adjust 1296 * the address accordingly. 1297 */ 1298 if (tp->ftt_segment != FASTTRAP_SEG_NONE && 1299 fasttrap_do_seg(tp, rp, &addr) != 0) { 1300 fasttrap_sigsegv(p, curthread, addr); 1301 new_pc = pc; 1302 break; 1303 } 1304 1305 #ifdef __amd64 1306 if (p->p_model == DATAMODEL_NATIVE) { 1307 #endif 1308 if (fasttrap_fulword((void *)addr, 1309 &value) == -1) { 1310 fasttrap_sigsegv(p, curthread, 1311 addr); 1312 new_pc = pc; 1313 break; 1314 } 1315 new_pc = value; 1316 #ifdef __amd64 1317 } else { 1318 uint32_t value32; 1319 addr = (uintptr_t)(uint32_t)addr; 1320 if (fasttrap_fuword32((void *)addr, 1321 &value32) == -1) { 1322 fasttrap_sigsegv(p, curthread, 1323 addr); 1324 new_pc = pc; 1325 break; 1326 } 1327 new_pc = value32; 1328 } 1329 #endif 1330 } else { 1331 new_pc = addr; 1332 } 1333 } 1334 1335 /* 1336 * If this is a call instruction, we need to push the return 1337 * address onto the stack. If this fails, we send the process 1338 * a SIGSEGV and reset the pc to emulate what would happen if 1339 * this instruction weren't traced. 1340 */ 1341 if (tp->ftt_type == FASTTRAP_T_CALL) { 1342 int ret; 1343 uintptr_t addr; 1344 #ifdef __amd64 1345 if (p->p_model == DATAMODEL_NATIVE) { 1346 addr = rp->r_sp - sizeof (uintptr_t); 1347 ret = fasttrap_sulword((void *)addr, 1348 pc + tp->ftt_size); 1349 } else { 1350 #endif 1351 addr = rp->r_sp - sizeof (uint32_t); 1352 ret = fasttrap_suword32((void *)addr, 1353 (uint32_t)(pc + tp->ftt_size)); 1354 #ifdef __amd64 1355 } 1356 #endif 1357 1358 if (ret == -1) { 1359 fasttrap_sigsegv(p, curthread, addr); 1360 new_pc = pc; 1361 break; 1362 } 1363 1364 rp->r_sp = addr; 1365 } 1366 1367 break; 1368 1369 case FASTTRAP_T_COMMON: 1370 { 1371 uintptr_t addr; 1372 uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 5 + 2]; 1373 uint_t i = 0; 1374 klwp_t *lwp = ttolwp(curthread); 1375 1376 /* 1377 * Compute the address of the ulwp_t and step over the 1378 * ul_self pointer. The method used to store the user-land 1379 * thread pointer is very different on 32- and 64-bit 1380 * kernels. 1381 */ 1382 #if defined(__amd64) 1383 if (p->p_model == DATAMODEL_LP64) { 1384 addr = lwp->lwp_pcb.pcb_fsbase; 1385 addr += sizeof (void *); 1386 } else { 1387 addr = lwp->lwp_pcb.pcb_gsbase; 1388 addr += sizeof (caddr32_t); 1389 } 1390 #elif defined(__i386) 1391 addr = USEGD_GETBASE(&lwp->lwp_pcb.pcb_gsdesc); 1392 addr += sizeof (void *); 1393 #endif 1394 1395 /* 1396 * Generic Instruction Tracing 1397 * --------------------------- 1398 * 1399 * This is the layout of the scratch space in the user-land 1400 * thread structure for our generated instructions. 1401 * 1402 * 32-bit mode bytes 1403 * ------------------------ ----- 1404 * a: <original instruction> <= 15 1405 * jmp <pc + tp->ftt_size> 5 1406 * b: <original instrction> <= 15 1407 * int T_DTRACE_RET 2 1408 * ----- 1409 * <= 37 1410 * 1411 * 64-bit mode bytes 1412 * ------------------------ ----- 1413 * a: <original instruction> <= 15 1414 * jmp 0(%rip) 6 1415 * <pc + tp->ftt_size> 8 1416 * b: <original instruction> <= 15 1417 * int T_DTRACE_RET 2 1418 * ----- 1419 * <= 46 1420 * 1421 * The %pc is set to a, and curthread->t_dtrace_astpc is set 1422 * to b. If we encounter a signal on the way out of the 1423 * kernel, trap() will set %pc to curthread->t_dtrace_astpc 1424 * so that we execute the original instruction and re-enter 1425 * the kernel rather than redirecting to the next instruction. 1426 * 1427 * If there are return probes (so we know that we're going to 1428 * need to reenter the kernel after executing the original 1429 * instruction), the scratch space will just contain the 1430 * original instruction followed by an interrupt -- the same 1431 * data as at b. 1432 * 1433 * %rip-relative Addressing 1434 * ------------------------ 1435 * 1436 * There's a further complication in 64-bit mode due to %rip- 1437 * relative addressing. While this is clearly a beneficial 1438 * architectural decision for position independent code, it's 1439 * hard not to see it as a personal attack against the pid 1440 * provider since before there was a relatively small set of 1441 * instructions to emulate; with %rip-relative addressing, 1442 * almost every instruction can potentially depend on the 1443 * address at which it's executed. Rather than emulating 1444 * the broad spectrum of instructions that can now be 1445 * position dependent, we emulate jumps and others as in 1446 * 32-bit mode, and take a different tack for instructions 1447 * using %rip-relative addressing. 1448 * 1449 * For every instruction that uses the ModRM byte, the 1450 * in-kernel disassembler reports its location. We use the 1451 * ModRM byte to identify that an instruction uses 1452 * %rip-relative addressing and to see what other registers 1453 * the instruction uses. To emulate those instructions, 1454 * we modify the instruction to be %rax-relative rather than 1455 * %rip-relative (or %rcx-relative if the instruction uses 1456 * %rax; or %r8- or %r9-relative if the REX.B is present so 1457 * we don't have to rewrite the REX prefix). We then load 1458 * the value that %rip would have been into the scratch 1459 * register and generate an instruction to reset the scratch 1460 * register back to its original value. The instruction 1461 * sequence looks like this: 1462 * 1463 * 64-mode %rip-relative bytes 1464 * ------------------------ ----- 1465 * a: <modified instruction> <= 15 1466 * movq $<value>, %<scratch> 6 1467 * jmp 0(%rip) 6 1468 * <pc + tp->ftt_size> 8 1469 * b: <modified instruction> <= 15 1470 * int T_DTRACE_RET 2 1471 * ----- 1472 * 52 1473 * 1474 * We set curthread->t_dtrace_regv so that upon receiving 1475 * a signal we can reset the value of the scratch register. 1476 */ 1477 1478 ASSERT(tp->ftt_size < FASTTRAP_MAX_INSTR_SIZE); 1479 1480 curthread->t_dtrace_scrpc = addr; 1481 bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size); 1482 i += tp->ftt_size; 1483 1484 #ifdef __amd64 1485 if (tp->ftt_ripmode != 0) { 1486 greg_t *reg; 1487 1488 ASSERT(p->p_model == DATAMODEL_LP64); 1489 ASSERT(tp->ftt_ripmode & 1490 (FASTTRAP_RIP_1 | FASTTRAP_RIP_2)); 1491 1492 /* 1493 * If this was a %rip-relative instruction, we change 1494 * it to be either a %rax- or %rcx-relative 1495 * instruction (depending on whether those registers 1496 * are used as another operand; or %r8- or %r9- 1497 * relative depending on the value of REX.B). We then 1498 * set that register and generate a movq instruction 1499 * to reset the value. 1500 */ 1501 if (tp->ftt_ripmode & FASTTRAP_RIP_X) 1502 scratch[i++] = FASTTRAP_REX(1, 0, 0, 1); 1503 else 1504 scratch[i++] = FASTTRAP_REX(1, 0, 0, 0); 1505 1506 if (tp->ftt_ripmode & FASTTRAP_RIP_1) 1507 scratch[i++] = FASTTRAP_MOV_EAX; 1508 else 1509 scratch[i++] = FASTTRAP_MOV_ECX; 1510 1511 switch (tp->ftt_ripmode) { 1512 case FASTTRAP_RIP_1: 1513 reg = &rp->r_rax; 1514 curthread->t_dtrace_reg = REG_RAX; 1515 break; 1516 case FASTTRAP_RIP_2: 1517 reg = &rp->r_rcx; 1518 curthread->t_dtrace_reg = REG_RCX; 1519 break; 1520 case FASTTRAP_RIP_1 | FASTTRAP_RIP_X: 1521 reg = &rp->r_r8; 1522 curthread->t_dtrace_reg = REG_R8; 1523 break; 1524 case FASTTRAP_RIP_2 | FASTTRAP_RIP_X: 1525 reg = &rp->r_r9; 1526 curthread->t_dtrace_reg = REG_R9; 1527 break; 1528 } 1529 1530 *(uint64_t *)&scratch[i] = *reg; 1531 curthread->t_dtrace_regv = *reg; 1532 *reg = pc + tp->ftt_size; 1533 i += sizeof (uint64_t); 1534 } 1535 #endif 1536 1537 /* 1538 * Generate the branch instruction to what would have 1539 * normally been the subsequent instruction. In 32-bit mode, 1540 * this is just a relative branch; in 64-bit mode this is a 1541 * %rip-relative branch that loads the 64-bit pc value 1542 * immediately after the jmp instruction. 1543 */ 1544 #ifdef __amd64 1545 if (p->p_model == DATAMODEL_LP64) { 1546 scratch[i++] = FASTTRAP_GROUP5_OP; 1547 scratch[i++] = FASTTRAP_MODRM(0, 4, 5); 1548 *(uint32_t *)&scratch[i] = 0; 1549 i += sizeof (uint32_t); 1550 *(uint64_t *)&scratch[i] = pc + tp->ftt_size; 1551 i += sizeof (uint64_t); 1552 } else { 1553 #endif 1554 /* 1555 * Set up the jmp to the next instruction; note that 1556 * the size of the traced instruction cancels out. 1557 */ 1558 scratch[i++] = FASTTRAP_JMP32; 1559 *(uint32_t *)&scratch[i] = pc - addr - 5; 1560 i += sizeof (uint32_t); 1561 #ifdef __amd64 1562 } 1563 #endif 1564 1565 curthread->t_dtrace_astpc = addr + i; 1566 bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size); 1567 i += tp->ftt_size; 1568 scratch[i++] = FASTTRAP_INT; 1569 scratch[i++] = T_DTRACE_RET; 1570 1571 if (fasttrap_copyout(scratch, (char *)addr, i)) { 1572 fasttrap_sigtrap(p, curthread, pc); 1573 new_pc = pc; 1574 break; 1575 } 1576 1577 if (tp->ftt_retids != NULL) { 1578 curthread->t_dtrace_step = 1; 1579 curthread->t_dtrace_ret = 1; 1580 new_pc = curthread->t_dtrace_astpc; 1581 } else { 1582 new_pc = curthread->t_dtrace_scrpc; 1583 } 1584 1585 curthread->t_dtrace_pc = pc; 1586 curthread->t_dtrace_npc = pc + tp->ftt_size; 1587 curthread->t_dtrace_on = 1; 1588 break; 1589 } 1590 1591 default: 1592 panic("fasttrap: mishandled an instruction"); 1593 } 1594 1595 done: 1596 /* 1597 * If there were no return probes when we first found the tracepoint, 1598 * we should feel no obligation to honor any return probes that were 1599 * subsequently enabled -- they'll just have to wait until the next 1600 * time around. 1601 */ 1602 if (tp->ftt_retids != NULL) { 1603 /* 1604 * We need to wait until the results of the instruction are 1605 * apparent before invoking any return probes. If this 1606 * instruction was emulated we can just call 1607 * fasttrap_return_common(); if it needs to be executed, we 1608 * need to wait until the user thread returns to the kernel. 1609 */ 1610 if (tp->ftt_type != FASTTRAP_T_COMMON) { 1611 /* 1612 * Set the program counter to the address of the traced 1613 * instruction so that it looks right in ustack() 1614 * output. We had previously set it to the end of the 1615 * instruction to simplify %rip-relative addressing. 1616 */ 1617 rp->r_pc = pc; 1618 1619 fasttrap_return_common(rp, pc, pid, new_pc); 1620 } else { 1621 ASSERT(curthread->t_dtrace_ret != 0); 1622 ASSERT(curthread->t_dtrace_pc == pc); 1623 ASSERT(curthread->t_dtrace_scrpc != 0); 1624 ASSERT(new_pc == curthread->t_dtrace_astpc); 1625 } 1626 } 1627 1628 rp->r_pc = new_pc; 1629 1630 return (0); 1631 } 1632 1633 int 1634 fasttrap_return_probe(struct regs *rp) 1635 { 1636 proc_t *p = curproc; 1637 uintptr_t pc = curthread->t_dtrace_pc; 1638 uintptr_t npc = curthread->t_dtrace_npc; 1639 1640 curthread->t_dtrace_pc = 0; 1641 curthread->t_dtrace_npc = 0; 1642 curthread->t_dtrace_scrpc = 0; 1643 curthread->t_dtrace_astpc = 0; 1644 1645 /* 1646 * Treat a child created by a call to vfork(2) as if it were its 1647 * parent. We know that there's only one thread of control in such a 1648 * process: this one. 1649 */ 1650 while (p->p_flag & SVFORK) { 1651 p = p->p_parent; 1652 } 1653 1654 /* 1655 * We set rp->r_pc to the address of the traced instruction so 1656 * that it appears to dtrace_probe() that we're on the original 1657 * instruction, and so that the user can't easily detect our 1658 * complex web of lies. dtrace_return_probe() (our caller) 1659 * will correctly set %pc after we return. 1660 */ 1661 rp->r_pc = pc; 1662 1663 fasttrap_return_common(rp, pc, p->p_pid, npc); 1664 1665 return (0); 1666 } 1667 1668 /*ARGSUSED*/ 1669 uint64_t 1670 fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno, 1671 int aframes) 1672 { 1673 return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, 1, argno)); 1674 } 1675 1676 /*ARGSUSED*/ 1677 uint64_t 1678 fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno, 1679 int aframes) 1680 { 1681 return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, 0, argno)); 1682 } 1683 1684 static ulong_t 1685 fasttrap_getreg(struct regs *rp, uint_t reg) 1686 { 1687 #ifdef __amd64 1688 switch (reg) { 1689 case REG_R15: return (rp->r_r15); 1690 case REG_R14: return (rp->r_r14); 1691 case REG_R13: return (rp->r_r13); 1692 case REG_R12: return (rp->r_r12); 1693 case REG_R11: return (rp->r_r11); 1694 case REG_R10: return (rp->r_r10); 1695 case REG_R9: return (rp->r_r9); 1696 case REG_R8: return (rp->r_r8); 1697 case REG_RDI: return (rp->r_rdi); 1698 case REG_RSI: return (rp->r_rsi); 1699 case REG_RBP: return (rp->r_rbp); 1700 case REG_RBX: return (rp->r_rbx); 1701 case REG_RDX: return (rp->r_rdx); 1702 case REG_RCX: return (rp->r_rcx); 1703 case REG_RAX: return (rp->r_rax); 1704 case REG_TRAPNO: return (rp->r_trapno); 1705 case REG_ERR: return (rp->r_err); 1706 case REG_RIP: return (rp->r_rip); 1707 case REG_CS: return (rp->r_cs); 1708 case REG_RFL: return (rp->r_rfl); 1709 case REG_RSP: return (rp->r_rsp); 1710 case REG_SS: return (rp->r_ss); 1711 case REG_FS: return (rp->r_fs); 1712 case REG_GS: return (rp->r_gs); 1713 case REG_DS: return (rp->r_ds); 1714 case REG_ES: return (rp->r_es); 1715 case REG_FSBASE: return (rdmsr(MSR_AMD_FSBASE)); 1716 case REG_GSBASE: return (rdmsr(MSR_AMD_GSBASE)); 1717 } 1718 1719 panic("dtrace: illegal register constant"); 1720 /*NOTREACHED*/ 1721 #else 1722 if (reg >= _NGREG) 1723 panic("dtrace: illegal register constant"); 1724 1725 return (((greg_t *)&rp->r_gs)[reg]); 1726 #endif 1727 } 1728