1 /* 2 * SPDX-License-Identifier: CDDL 1.0 3 * 4 * Copyright 2022 Christos Margiolis <christos@FreeBSD.org> 5 * Copyright 2022 Mark Johnston <markj@FreeBSD.org> 6 */ 7 8 #include <sys/param.h> 9 10 #include <machine/cpufunc.h> 11 #include <machine/md_var.h> 12 13 #include <sys/dtrace.h> 14 #include <cddl/dev/dtrace/dtrace_cddl.h> 15 #include <dis_tables.h> 16 17 #include "kinst.h" 18 19 #define KINST_PUSHL_RBP 0x55 20 #define KINST_STI 0xfb 21 #define KINST_POPF 0x9d 22 23 #define KINST_MODRM_MOD(b) (((b) & 0xc0) >> 6) 24 #define KINST_MODRM_REG(b) (((b) & 0x38) >> 3) 25 #define KINST_MODRM_RM(b) ((b) & 0x07) 26 27 #define KINST_SIB_SCALE(s) (((s) & 0xc0) >> 6) 28 #define KINST_SIB_INDEX(s) (((s) & 0x38) >> 3) 29 #define KINST_SIB_BASE(s) (((s) & 0x07) >> 0) 30 31 #define KINST_REX_W(r) (((r) & 0x08) >> 3) 32 #define KINST_REX_R(r) (((r) & 0x04) >> 2) 33 #define KINST_REX_X(r) (((r) & 0x02) >> 1) 34 #define KINST_REX_B(r) (((r) & 0x01) >> 0) 35 36 #define KINST_F_CALL 0x0001 /* instruction is a "call" */ 37 #define KINST_F_DIRECT_CALL 0x0002 /* instruction is a direct call */ 38 #define KINST_F_RIPREL 0x0004 /* instruction is position-dependent */ 39 #define KINST_F_JMP 0x0008 /* instruction is a %rip-relative jmp */ 40 #define KINST_F_MOD_DIRECT 0x0010 /* operand is not a memory address */ 41 42 /* 43 * Map ModR/M register bits to a trapframe offset. 44 */ 45 static int 46 kinst_regoff(int reg) 47 { 48 #define _MATCH_REG(i, reg) \ 49 case i: \ 50 return (offsetof(struct trapframe, tf_ ## reg) / \ 51 sizeof(register_t)) 52 switch (reg) { 53 _MATCH_REG( 0, rax); 54 _MATCH_REG( 1, rcx); 55 _MATCH_REG( 2, rdx); 56 _MATCH_REG( 3, rbx); 57 _MATCH_REG( 4, rsp); /* SIB when mod != 3 */ 58 _MATCH_REG( 5, rbp); 59 _MATCH_REG( 6, rsi); 60 _MATCH_REG( 7, rdi); 61 _MATCH_REG( 8, r8); /* REX.R is set */ 62 _MATCH_REG( 9, r9); 63 _MATCH_REG(10, r10); 64 _MATCH_REG(11, r11); 65 _MATCH_REG(12, r12); 66 _MATCH_REG(13, r13); 67 _MATCH_REG(14, r14); 68 _MATCH_REG(15, r15); 69 } 70 #undef _MATCH_REG 71 panic("%s: unhandled register index %d", __func__, reg); 72 } 73 74 /* 75 * Obtain the specified register's value. 76 */ 77 static uint64_t 78 kinst_regval(struct trapframe *frame, int reg) 79 { 80 if (reg == -1) 81 return (0); 82 return (((register_t *)frame)[kinst_regoff(reg)]); 83 } 84 85 static uint32_t 86 kinst_riprel_disp(struct kinst_probe *kp, void *dst) 87 { 88 return ((uint32_t)((intptr_t)kp->kp_patchpoint + kp->kp_md.disp - 89 (intptr_t)dst)); 90 } 91 92 static void 93 kinst_trampoline_populate(struct kinst_probe *kp, uint8_t *tramp) 94 { 95 uint8_t *instr; 96 uint32_t disp; 97 int ilen; 98 99 ilen = kp->kp_md.tinstlen; 100 101 memcpy(tramp, kp->kp_md.template, ilen); 102 if ((kp->kp_md.flags & KINST_F_RIPREL) != 0) { 103 disp = kinst_riprel_disp(kp, tramp); 104 memcpy(&tramp[kp->kp_md.dispoff], &disp, sizeof(uint32_t)); 105 } 106 107 /* 108 * The following position-independent jmp takes us back to the 109 * original code. It is encoded as "jmp *0(%rip)" (six bytes), 110 * followed by the absolute address of the instruction following 111 * the one that was traced (eight bytes). 112 */ 113 tramp[ilen + 0] = 0xff; 114 tramp[ilen + 1] = 0x25; 115 tramp[ilen + 2] = 0x00; 116 tramp[ilen + 3] = 0x00; 117 tramp[ilen + 4] = 0x00; 118 tramp[ilen + 5] = 0x00; 119 instr = kp->kp_patchpoint + kp->kp_md.instlen; 120 memcpy(&tramp[ilen + 6], &instr, sizeof(uintptr_t)); 121 } 122 123 int 124 kinst_invop(uintptr_t addr, struct trapframe *frame, uintptr_t scratch) 125 { 126 solaris_cpu_t *cpu; 127 uintptr_t *stack, retaddr; 128 struct kinst_probe *kp; 129 struct kinst_probe_md *kpmd; 130 uint8_t *tramp; 131 132 stack = (uintptr_t *)frame->tf_rsp; 133 cpu = &solaris_cpu[curcpu]; 134 135 LIST_FOREACH(kp, KINST_GETPROBE(addr), kp_hashnext) { 136 if ((uintptr_t)kp->kp_patchpoint == addr) 137 break; 138 } 139 if (kp == NULL) 140 return (0); 141 142 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 143 cpu->cpu_dtrace_caller = stack[0]; 144 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR); 145 dtrace_probe(kp->kp_id, 0, 0, 0, 0, 0); 146 cpu->cpu_dtrace_caller = 0; 147 148 kpmd = &kp->kp_md; 149 if ((kpmd->flags & KINST_F_CALL) != 0) { 150 /* 151 * dtrace_invop_start() reserves space on the stack to 152 * store the return address of the call instruction. 153 */ 154 retaddr = (uintptr_t)(kp->kp_patchpoint + kpmd->instlen); 155 *(uintptr_t *)scratch = retaddr; 156 157 if ((kpmd->flags & KINST_F_DIRECT_CALL) != 0) { 158 frame->tf_rip = (uintptr_t)(kp->kp_patchpoint + 159 kpmd->disp + kpmd->instlen); 160 } else { 161 register_t rval; 162 163 if (kpmd->reg1 == -1 && kpmd->reg2 == -1) { 164 /* rip-relative */ 165 rval = frame->tf_rip - 1 + kpmd->instlen; 166 } else { 167 /* indirect */ 168 rval = kinst_regval(frame, kpmd->reg1) + 169 (kinst_regval(frame, kpmd->reg2) << 170 kpmd->scale); 171 } 172 173 if ((kpmd->flags & KINST_F_MOD_DIRECT) != 0) { 174 frame->tf_rip = rval + kpmd->disp; 175 } else { 176 frame->tf_rip = 177 *(uintptr_t *)(rval + kpmd->disp); 178 } 179 } 180 return (DTRACE_INVOP_CALL); 181 } else { 182 tramp = curthread->t_kinst; 183 if (tramp == NULL) { 184 /* 185 * A trampoline allocation failed, so this probe is 186 * effectively disabled. Restore the original 187 * instruction. 188 * 189 * We can't safely print anything here, but the 190 * trampoline allocator should have left a breadcrumb in 191 * the dmesg. 192 */ 193 kinst_patch_tracepoint(kp, kp->kp_savedval); 194 frame->tf_rip = (register_t)kp->kp_patchpoint; 195 } else { 196 kinst_trampoline_populate(kp, tramp); 197 frame->tf_rip = (register_t)tramp; 198 } 199 return (DTRACE_INVOP_NOP); 200 } 201 } 202 203 void 204 kinst_patch_tracepoint(struct kinst_probe *kp, kinst_patchval_t val) 205 { 206 register_t reg; 207 int oldwp; 208 209 reg = intr_disable(); 210 oldwp = disable_wp(); 211 *kp->kp_patchpoint = val; 212 restore_wp(oldwp); 213 intr_restore(reg); 214 } 215 216 static void 217 kinst_set_disp8(struct kinst_probe *kp, uint8_t byte) 218 { 219 kp->kp_md.disp = (int64_t)(int8_t)byte; 220 } 221 222 static void 223 kinst_set_disp32(struct kinst_probe *kp, uint8_t *bytes) 224 { 225 int32_t disp32; 226 227 memcpy(&disp32, bytes, sizeof(disp32)); 228 kp->kp_md.disp = (int64_t)disp32; 229 } 230 231 static int 232 kinst_dis_get_byte(void *p) 233 { 234 int ret; 235 uint8_t **instr = p; 236 237 ret = **instr; 238 (*instr)++; 239 240 return (ret); 241 } 242 243 /* 244 * Set up all of the state needed to faithfully execute a probed instruction. 245 * 246 * In the simple case, we copy the instruction unmodified to a per-thread 247 * trampoline, wherein it is followed by a jump back to the original code. 248 * - Instructions can have %rip as an operand: 249 * - with %rip-relative addressing encoded in ModR/M, or 250 * - implicitly as a part of the instruction definition (jmp, call). 251 * - Call instructions (which may be %rip-relative) need to push the correct 252 * return address onto the stack. 253 * 254 * Call instructions are simple enough to be emulated in software, so we simply 255 * do not use the trampoline mechanism in that case. kinst_invop() will compute 256 * the branch target using the address info computed here (register operands and 257 * displacement). 258 * 259 * %rip-relative operands encoded using the ModR/M byte always use a 32-bit 260 * displacement; when populating the trampoline the displacement is adjusted to 261 * be relative to the trampoline address. Trampolines are always allocated 262 * above KERNBASE for this reason. 263 * 264 * For other %rip-relative operands (just jumps) we take the same approach. 265 * Instructions which specify an 8-bit displacement must be rewritten to use a 266 * 32-bit displacement. 267 */ 268 static int 269 kinst_instr_dissect(struct kinst_probe *kp, uint8_t **instr) 270 { 271 struct kinst_probe_md *kpmd; 272 dis86_t d86; 273 uint8_t *bytes, modrm, rex; 274 int dispoff, i, ilen, opcidx; 275 276 kpmd = &kp->kp_md; 277 278 d86.d86_data = instr; 279 d86.d86_get_byte = kinst_dis_get_byte; 280 d86.d86_check_func = NULL; 281 if (dtrace_disx86(&d86, SIZE64) != 0) { 282 KINST_LOG("failed to disassemble instruction at: %p", *instr); 283 return (EINVAL); 284 } 285 bytes = d86.d86_bytes; 286 kpmd->instlen = kpmd->tinstlen = d86.d86_len; 287 288 /* 289 * Skip over prefixes, save REX. 290 */ 291 rex = 0; 292 for (i = 0; i < kpmd->instlen; i++) { 293 switch (bytes[i]) { 294 case 0xf0 ... 0xf3: 295 /* group 1 */ 296 continue; 297 case 0x26: 298 case 0x2e: 299 case 0x36: 300 case 0x3e: 301 case 0x64: 302 case 0x65: 303 /* group 2 */ 304 continue; 305 case 0x66: 306 /* group 3 */ 307 continue; 308 case 0x67: 309 /* group 4 */ 310 continue; 311 case 0x40 ... 0x4f: 312 /* REX */ 313 rex = bytes[i]; 314 continue; 315 } 316 break; 317 } 318 KASSERT(i < kpmd->instlen, 319 ("%s: failed to disassemble instruction at %p", __func__, bytes)); 320 opcidx = i; 321 322 /* 323 * Identify instructions of interest by opcode: calls and jumps. 324 * Extract displacements. 325 */ 326 dispoff = -1; 327 switch (bytes[opcidx]) { 328 case 0x0f: 329 switch (bytes[opcidx + 1]) { 330 case 0x80 ... 0x8f: 331 /* conditional jmp near */ 332 kpmd->flags |= KINST_F_JMP | KINST_F_RIPREL; 333 dispoff = opcidx + 2; 334 kinst_set_disp32(kp, &bytes[dispoff]); 335 break; 336 } 337 break; 338 case 0xe3: 339 /* 340 * There is no straightforward way to translate this instruction 341 * to use a 32-bit displacement. Fortunately, it is rarely 342 * used. 343 */ 344 return (EINVAL); 345 case 0x70 ... 0x7f: 346 /* conditional jmp short */ 347 kpmd->flags |= KINST_F_JMP | KINST_F_RIPREL; 348 dispoff = opcidx + 1; 349 kinst_set_disp8(kp, bytes[dispoff]); 350 break; 351 case 0xe9: 352 /* unconditional jmp near */ 353 kpmd->flags |= KINST_F_JMP | KINST_F_RIPREL; 354 dispoff = opcidx + 1; 355 kinst_set_disp32(kp, &bytes[dispoff]); 356 break; 357 case 0xeb: 358 /* unconditional jmp short */ 359 kpmd->flags |= KINST_F_JMP | KINST_F_RIPREL; 360 dispoff = opcidx + 1; 361 kinst_set_disp8(kp, bytes[dispoff]); 362 break; 363 case 0xe8: 364 case 0x9a: 365 /* direct call */ 366 kpmd->flags |= KINST_F_CALL | KINST_F_DIRECT_CALL; 367 dispoff = opcidx + 1; 368 kinst_set_disp32(kp, &bytes[dispoff]); 369 break; 370 case 0xff: 371 KASSERT(d86.d86_got_modrm, 372 ("no ModR/M byte for instr at %p", *instr - kpmd->instlen)); 373 switch (KINST_MODRM_REG(bytes[d86.d86_rmindex])) { 374 case 0x02: 375 case 0x03: 376 /* indirect call */ 377 kpmd->flags |= KINST_F_CALL; 378 break; 379 case 0x04: 380 case 0x05: 381 /* indirect jump */ 382 kpmd->flags |= KINST_F_JMP; 383 break; 384 } 385 } 386 387 /* 388 * If there's a ModR/M byte, we need to check it to see if the operand 389 * is %rip-relative, and rewrite the displacement if so. If not, we 390 * might still have to extract operand info if this is a call 391 * instruction. 392 */ 393 if (d86.d86_got_modrm) { 394 uint8_t mod, rm, sib; 395 396 kpmd->reg1 = kpmd->reg2 = -1; 397 398 modrm = bytes[d86.d86_rmindex]; 399 mod = KINST_MODRM_MOD(modrm); 400 rm = KINST_MODRM_RM(modrm); 401 if (mod == 0 && rm == 5) { 402 kpmd->flags |= KINST_F_RIPREL; 403 dispoff = d86.d86_rmindex + 1; 404 kinst_set_disp32(kp, &bytes[dispoff]); 405 } else if ((kpmd->flags & KINST_F_CALL) != 0) { 406 bool havesib; 407 408 havesib = (mod != 3 && rm == 4); 409 dispoff = d86.d86_rmindex + (havesib ? 2 : 1); 410 if (mod == 1) 411 kinst_set_disp8(kp, bytes[dispoff]); 412 else if (mod == 2) 413 kinst_set_disp32(kp, &bytes[dispoff]); 414 else if (mod == 3) 415 kpmd->flags |= KINST_F_MOD_DIRECT; 416 417 if (havesib) { 418 sib = bytes[d86.d86_rmindex + 1]; 419 if (KINST_SIB_BASE(sib) != 5) { 420 kpmd->reg1 = KINST_SIB_BASE(sib) | 421 (KINST_REX_B(rex) << 3); 422 } 423 kpmd->scale = KINST_SIB_SCALE(sib); 424 kpmd->reg2 = KINST_SIB_INDEX(sib) | 425 (KINST_REX_X(rex) << 3); 426 } else { 427 kpmd->reg1 = rm | (KINST_REX_B(rex) << 3); 428 } 429 } 430 } 431 432 /* 433 * Calls are emulated in software; once operands are decoded we have 434 * nothing else to do. 435 */ 436 if ((kpmd->flags & KINST_F_CALL) != 0) 437 return (0); 438 439 /* 440 * Allocate and populate an instruction trampoline template. 441 * 442 * Position-independent instructions can simply be copied, but 443 * position-dependent instructions require some surgery: jump 444 * instructions with an 8-bit displacement need to be converted to use a 445 * 32-bit displacement, and the adjusted displacement needs to be 446 * computed. 447 */ 448 ilen = kpmd->instlen; 449 if ((kpmd->flags & KINST_F_RIPREL) != 0) { 450 if ((kpmd->flags & KINST_F_JMP) == 0 || 451 bytes[opcidx] == 0x0f || 452 bytes[opcidx] == 0xe9 || 453 bytes[opcidx] == 0xff) { 454 memcpy(kpmd->template, bytes, dispoff); 455 memcpy(&kpmd->template[dispoff + 4], 456 &bytes[dispoff + 4], ilen - (dispoff + 4)); 457 kpmd->dispoff = dispoff; 458 } else if (bytes[opcidx] == 0xeb) { 459 memcpy(kpmd->template, bytes, opcidx); 460 kpmd->template[opcidx] = 0xe9; 461 kpmd->dispoff = opcidx + 1; 462 463 /* Instruction length changes from 2 to 5. */ 464 kpmd->tinstlen = 5; 465 kpmd->disp -= 3; 466 } else if (bytes[opcidx] >= 0x70 && bytes[opcidx] <= 0x7f) { 467 memcpy(kpmd->template, bytes, opcidx); 468 kpmd->template[opcidx] = 0x0f; 469 kpmd->template[opcidx + 1] = bytes[opcidx] + 0x10; 470 kpmd->dispoff = opcidx + 2; 471 472 /* Instruction length changes from 2 to 6. */ 473 kpmd->tinstlen = 6; 474 kpmd->disp -= 4; 475 } else { 476 panic("unhandled opcode %#x", bytes[opcidx]); 477 } 478 } else { 479 memcpy(kpmd->template, bytes, ilen); 480 } 481 482 return (0); 483 } 484 485 int 486 kinst_make_probe(linker_file_t lf, int symindx, linker_symval_t *symval, 487 void *opaque) 488 { 489 struct kinst_probe *kp; 490 dtrace_kinst_probedesc_t *pd; 491 const char *func; 492 int error, n, off; 493 uint8_t *instr, *limit; 494 495 pd = opaque; 496 func = symval->name; 497 if (strcmp(func, pd->kpd_func) != 0 || strcmp(func, "trap_check") == 0) 498 return (0); 499 500 instr = (uint8_t *)symval->value; 501 limit = (uint8_t *)symval->value + symval->size; 502 if (instr >= limit) 503 return (0); 504 505 /* 506 * Ignore functions not beginning with the usual function prologue. 507 * These might correspond to assembly routines with which we should not 508 * meddle. 509 */ 510 if (*instr != KINST_PUSHL_RBP) 511 return (0); 512 513 n = 0; 514 while (instr < limit) { 515 off = (int)(instr - (uint8_t *)symval->value); 516 if (pd->kpd_off != -1 && off != pd->kpd_off) { 517 instr += dtrace_instr_size(instr); 518 continue; 519 } 520 521 /* 522 * Prevent separate dtrace(1) instances from creating copies of 523 * the same probe. 524 */ 525 LIST_FOREACH(kp, KINST_GETPROBE(instr), kp_hashnext) { 526 if (strcmp(kp->kp_func, func) == 0 && 527 strtol(kp->kp_name, NULL, 10) == off) 528 return (0); 529 } 530 if (++n > KINST_PROBETAB_MAX) { 531 KINST_LOG("probe list full: %d entries", n); 532 return (ENOMEM); 533 } 534 kp = malloc(sizeof(struct kinst_probe), M_KINST, 535 M_WAITOK | M_ZERO); 536 kp->kp_func = func; 537 snprintf(kp->kp_name, sizeof(kp->kp_name), "%d", off); 538 kp->kp_savedval = *instr; 539 kp->kp_patchval = KINST_PATCHVAL; 540 kp->kp_patchpoint = instr; 541 542 error = kinst_instr_dissect(kp, &instr); 543 if (error != 0) 544 return (error); 545 546 kinst_probe_create(kp, lf); 547 } 548 549 return (0); 550 } 551