1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 #include <sys/cdefs.h> 27 28 #include <sys/param.h> 29 #include <sys/systm.h> 30 #include <sys/dtrace_impl.h> 31 #include <sys/kernel.h> 32 #include <sys/stack.h> 33 #include <sys/pcpu.h> 34 35 #include <cddl/dev/dtrace/dtrace_cddl.h> 36 37 #include <machine/frame.h> 38 #include <machine/md_var.h> 39 #include <machine/stack.h> 40 #include <x86/ifunc.h> 41 42 #include <vm/vm.h> 43 #include <vm/vm_param.h> 44 #include <vm/pmap.h> 45 46 #include "regset.h" 47 48 uint8_t dtrace_fuword8_nocheck(void *); 49 uint16_t dtrace_fuword16_nocheck(void *); 50 uint32_t dtrace_fuword32_nocheck(void *); 51 uint64_t dtrace_fuword64_nocheck(void *); 52 53 int dtrace_ustackdepth_max = 2048; 54 55 void 56 dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes, 57 uint32_t *intrpc) 58 { 59 struct thread *td; 60 int depth = 0; 61 register_t rbp; 62 struct amd64_frame *frame; 63 vm_offset_t callpc; 64 pc_t caller = (pc_t) solaris_cpu[curcpu].cpu_dtrace_caller; 65 66 if (intrpc != 0) 67 pcstack[depth++] = (pc_t) intrpc; 68 69 aframes++; 70 71 __asm __volatile("movq %%rbp,%0" : "=r" (rbp)); 72 73 frame = (struct amd64_frame *)rbp; 74 td = curthread; 75 while (depth < pcstack_limit) { 76 if (!kstack_contains(curthread, (vm_offset_t)frame, 77 sizeof(*frame))) 78 break; 79 80 callpc = frame->f_retaddr; 81 82 if (!INKERNEL(callpc)) 83 break; 84 85 if (aframes > 0) { 86 aframes--; 87 if ((aframes == 0) && (caller != 0)) { 88 pcstack[depth++] = caller; 89 } 90 } else { 91 pcstack[depth++] = callpc; 92 } 93 94 if ((vm_offset_t)frame->f_frame <= (vm_offset_t)frame) 95 break; 96 frame = frame->f_frame; 97 } 98 99 for (; depth < pcstack_limit; depth++) { 100 pcstack[depth] = 0; 101 } 102 } 103 104 static int 105 dtrace_getustack_common(uint64_t *pcstack, int pcstack_limit, uintptr_t pc, 106 uintptr_t sp) 107 { 108 uintptr_t oldsp; 109 volatile uint16_t *flags = 110 (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags; 111 int ret = 0; 112 113 ASSERT(pcstack == NULL || pcstack_limit > 0); 114 ASSERT(dtrace_ustackdepth_max > 0); 115 116 while (pc != 0) { 117 /* 118 * We limit the number of times we can go around this 119 * loop to account for a circular stack. 120 */ 121 if (ret++ >= dtrace_ustackdepth_max) { 122 *flags |= CPU_DTRACE_BADSTACK; 123 cpu_core[curcpu].cpuc_dtrace_illval = sp; 124 break; 125 } 126 127 if (pcstack != NULL) { 128 *pcstack++ = (uint64_t)pc; 129 pcstack_limit--; 130 if (pcstack_limit <= 0) 131 break; 132 } 133 134 if (sp == 0) 135 break; 136 137 oldsp = sp; 138 139 pc = dtrace_fuword64((void *)(sp + 140 offsetof(struct amd64_frame, f_retaddr))); 141 sp = dtrace_fuword64((void *)sp); 142 143 if (sp == oldsp) { 144 *flags |= CPU_DTRACE_BADSTACK; 145 cpu_core[curcpu].cpuc_dtrace_illval = sp; 146 break; 147 } 148 149 /* 150 * This is totally bogus: if we faulted, we're going to clear 151 * the fault and break. This is to deal with the apparently 152 * broken Java stacks on x86. 153 */ 154 if (*flags & CPU_DTRACE_FAULT) { 155 *flags &= ~CPU_DTRACE_FAULT; 156 break; 157 } 158 } 159 160 return (ret); 161 } 162 163 void 164 dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit) 165 { 166 proc_t *p = curproc; 167 struct trapframe *tf; 168 uintptr_t pc, sp, fp; 169 volatile uint16_t *flags = 170 (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags; 171 int n; 172 173 if (*flags & CPU_DTRACE_FAULT) 174 return; 175 176 if (pcstack_limit <= 0) 177 return; 178 179 /* 180 * If there's no user context we still need to zero the stack. 181 */ 182 if (p == NULL || (tf = curthread->td_frame) == NULL) 183 goto zero; 184 185 *pcstack++ = (uint64_t)p->p_pid; 186 pcstack_limit--; 187 188 if (pcstack_limit <= 0) 189 return; 190 191 pc = tf->tf_rip; 192 fp = tf->tf_rbp; 193 sp = tf->tf_rsp; 194 195 if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) { 196 /* 197 * In an entry probe. The frame pointer has not yet been 198 * pushed (that happens in the function prologue). The 199 * best approach is to add the current pc as a missing top 200 * of stack and back the pc up to the caller, which is stored 201 * at the current stack pointer address since the call 202 * instruction puts it there right before the branch. 203 */ 204 205 *pcstack++ = (uint64_t)pc; 206 pcstack_limit--; 207 if (pcstack_limit <= 0) 208 return; 209 210 pc = dtrace_fuword64((void *) sp); 211 } 212 213 n = dtrace_getustack_common(pcstack, pcstack_limit, pc, fp); 214 ASSERT(n >= 0); 215 ASSERT(n <= pcstack_limit); 216 217 pcstack += n; 218 pcstack_limit -= n; 219 220 zero: 221 while (pcstack_limit-- > 0) 222 *pcstack++ = 0; 223 } 224 225 int 226 dtrace_getustackdepth(void) 227 { 228 proc_t *p = curproc; 229 struct trapframe *tf; 230 uintptr_t pc, fp, sp; 231 int n = 0; 232 233 if (p == NULL || (tf = curthread->td_frame) == NULL) 234 return (0); 235 236 if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT)) 237 return (-1); 238 239 pc = tf->tf_rip; 240 fp = tf->tf_rbp; 241 sp = tf->tf_rsp; 242 243 if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) { 244 /* 245 * In an entry probe. The frame pointer has not yet been 246 * pushed (that happens in the function prologue). The 247 * best approach is to add the current pc as a missing top 248 * of stack and back the pc up to the caller, which is stored 249 * at the current stack pointer address since the call 250 * instruction puts it there right before the branch. 251 */ 252 253 pc = dtrace_fuword64((void *) sp); 254 n++; 255 } 256 257 n += dtrace_getustack_common(NULL, 0, pc, fp); 258 259 return (n); 260 } 261 262 void 263 dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit) 264 { 265 proc_t *p = curproc; 266 struct trapframe *tf; 267 uintptr_t pc, sp, fp; 268 volatile uint16_t *flags = 269 (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags; 270 #ifdef notyet /* XXX signal stack */ 271 uintptr_t oldcontext; 272 size_t s1, s2; 273 #endif 274 275 if (*flags & CPU_DTRACE_FAULT) 276 return; 277 278 if (pcstack_limit <= 0) 279 return; 280 281 /* 282 * If there's no user context we still need to zero the stack. 283 */ 284 if (p == NULL || (tf = curthread->td_frame) == NULL) 285 goto zero; 286 287 *pcstack++ = (uint64_t)p->p_pid; 288 pcstack_limit--; 289 290 if (pcstack_limit <= 0) 291 return; 292 293 pc = tf->tf_rip; 294 sp = tf->tf_rsp; 295 fp = tf->tf_rbp; 296 297 #ifdef notyet /* XXX signal stack */ 298 oldcontext = lwp->lwp_oldcontext; 299 s1 = sizeof (struct xframe) + 2 * sizeof (long); 300 s2 = s1 + sizeof (siginfo_t); 301 #endif 302 303 if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) { 304 *pcstack++ = (uint64_t)pc; 305 *fpstack++ = 0; 306 pcstack_limit--; 307 if (pcstack_limit <= 0) 308 return; 309 310 pc = dtrace_fuword64((void *)sp); 311 } 312 313 while (pc != 0) { 314 *pcstack++ = (uint64_t)pc; 315 *fpstack++ = fp; 316 pcstack_limit--; 317 if (pcstack_limit <= 0) 318 break; 319 320 if (fp == 0) 321 break; 322 323 #ifdef notyet /* XXX signal stack */ 324 if (oldcontext == sp + s1 || oldcontext == sp + s2) { 325 ucontext_t *ucp = (ucontext_t *)oldcontext; 326 greg_t *gregs = ucp->uc_mcontext.gregs; 327 328 sp = dtrace_fulword(&gregs[REG_FP]); 329 pc = dtrace_fulword(&gregs[REG_PC]); 330 331 oldcontext = dtrace_fulword(&ucp->uc_link); 332 } else 333 #endif /* XXX */ 334 { 335 pc = dtrace_fuword64((void *)(fp + 336 offsetof(struct amd64_frame, f_retaddr))); 337 fp = dtrace_fuword64((void *)fp); 338 } 339 340 /* 341 * This is totally bogus: if we faulted, we're going to clear 342 * the fault and break. This is to deal with the apparently 343 * broken Java stacks on x86. 344 */ 345 if (*flags & CPU_DTRACE_FAULT) { 346 *flags &= ~CPU_DTRACE_FAULT; 347 break; 348 } 349 } 350 351 zero: 352 while (pcstack_limit-- > 0) 353 *pcstack++ = 0; 354 } 355 356 /*ARGSUSED*/ 357 uint64_t 358 dtrace_getarg(int arg, int aframes) 359 { 360 struct thread *td; 361 uintptr_t val; 362 struct amd64_frame *fp = (struct amd64_frame *)dtrace_getfp(); 363 uintptr_t *stack; 364 int i; 365 366 /* 367 * A total of 6 arguments are passed via registers; any argument with 368 * index of 5 or lower is therefore in a register. 369 */ 370 int inreg = 5; 371 372 /* 373 * Did we arrive here via dtrace_invop()? We can simply fetch arguments 374 * from the trap frame if so. 375 */ 376 td = curthread; 377 if (td->t_dtrace_trapframe != NULL) { 378 struct trapframe *tf = td->t_dtrace_trapframe; 379 380 if (arg <= inreg) { 381 switch (arg) { 382 case 0: 383 return (tf->tf_rdi); 384 case 1: 385 return (tf->tf_rsi); 386 case 2: 387 return (tf->tf_rdx); 388 case 3: 389 return (tf->tf_rcx); 390 case 4: 391 return (tf->tf_r8); 392 case 5: 393 return (tf->tf_r9); 394 } 395 } 396 397 arg -= inreg; 398 stack = (uintptr_t *)tf->tf_rsp; 399 goto load; 400 } 401 402 for (i = 1; i <= aframes; i++) 403 fp = fp->f_frame; 404 405 /* 406 * We know that we did not come through a trap to get into 407 * dtrace_probe() -- the provider simply called dtrace_probe() 408 * directly. As this is the case, we need to shift the argument 409 * that we're looking for: the probe ID is the first argument to 410 * dtrace_probe(), so the argument n will actually be found where 411 * one would expect to find argument (n + 1). 412 */ 413 arg++; 414 415 if (arg <= inreg) { 416 /* 417 * This shouldn't happen. If the argument is passed in a 418 * register then it should have been, well, passed in a 419 * register... 420 */ 421 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); 422 return (0); 423 } 424 425 arg -= (inreg + 1); 426 stack = (uintptr_t *)&fp[1]; 427 428 load: 429 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 430 val = stack[arg]; 431 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 432 433 return (val); 434 } 435 436 int 437 dtrace_getstackdepth(int aframes) 438 { 439 int depth = 0; 440 struct amd64_frame *frame; 441 vm_offset_t rbp; 442 443 aframes++; 444 rbp = dtrace_getfp(); 445 frame = (struct amd64_frame *)rbp; 446 depth++; 447 for(;;) { 448 if (!kstack_contains(curthread, (vm_offset_t)frame, 449 sizeof(*frame))) 450 break; 451 depth++; 452 if (frame->f_frame <= frame) 453 break; 454 frame = frame->f_frame; 455 } 456 if (depth < aframes) 457 return 0; 458 else 459 return depth - aframes; 460 } 461 462 ulong_t 463 dtrace_getreg(struct trapframe *frame, uint_t reg) 464 { 465 /* This table is dependent on reg.d. */ 466 int regmap[] = { 467 REG_GS, /* 0 GS */ 468 REG_FS, /* 1 FS */ 469 REG_ES, /* 2 ES */ 470 REG_DS, /* 3 DS */ 471 REG_RDI, /* 4 EDI */ 472 REG_RSI, /* 5 ESI */ 473 REG_RBP, /* 6 EBP, REG_FP */ 474 REG_RSP, /* 7 ESP */ 475 REG_RBX, /* 8 EBX, REG_R1 */ 476 REG_RDX, /* 9 EDX */ 477 REG_RCX, /* 10 ECX */ 478 REG_RAX, /* 11 EAX, REG_R0 */ 479 REG_TRAPNO, /* 12 TRAPNO */ 480 REG_ERR, /* 13 ERR */ 481 REG_RIP, /* 14 EIP, REG_PC */ 482 REG_CS, /* 15 CS */ 483 REG_RFL, /* 16 EFL, REG_PS */ 484 REG_RSP, /* 17 UESP, REG_SP */ 485 REG_SS /* 18 SS */ 486 }; 487 488 if (reg <= GS) { 489 if (reg >= sizeof (regmap) / sizeof (int)) { 490 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); 491 return (0); 492 } 493 494 reg = regmap[reg]; 495 } else { 496 /* This is dependent on reg.d. */ 497 reg -= GS + 1; 498 } 499 500 switch (reg) { 501 case REG_RDI: 502 return (frame->tf_rdi); 503 case REG_RSI: 504 return (frame->tf_rsi); 505 case REG_RDX: 506 return (frame->tf_rdx); 507 case REG_RCX: 508 return (frame->tf_rcx); 509 case REG_R8: 510 return (frame->tf_r8); 511 case REG_R9: 512 return (frame->tf_r9); 513 case REG_RAX: 514 return (frame->tf_rax); 515 case REG_RBX: 516 return (frame->tf_rbx); 517 case REG_RBP: 518 return (frame->tf_rbp); 519 case REG_R10: 520 return (frame->tf_r10); 521 case REG_R11: 522 return (frame->tf_r11); 523 case REG_R12: 524 return (frame->tf_r12); 525 case REG_R13: 526 return (frame->tf_r13); 527 case REG_R14: 528 return (frame->tf_r14); 529 case REG_R15: 530 return (frame->tf_r15); 531 case REG_DS: 532 return (frame->tf_ds); 533 case REG_ES: 534 return (frame->tf_es); 535 case REG_FS: 536 return (frame->tf_fs); 537 case REG_GS: 538 return (frame->tf_gs); 539 case REG_TRAPNO: 540 return (frame->tf_trapno); 541 case REG_ERR: 542 return (frame->tf_err); 543 case REG_RIP: 544 return (frame->tf_rip); 545 case REG_CS: 546 return (frame->tf_cs); 547 case REG_SS: 548 return (frame->tf_ss); 549 case REG_RFL: 550 return (frame->tf_rflags); 551 case REG_RSP: 552 return (frame->tf_rsp); 553 default: 554 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); 555 return (0); 556 } 557 } 558 559 static int 560 dtrace_copycheck(uintptr_t uaddr, uintptr_t kaddr, size_t size) 561 { 562 ASSERT(INKERNEL(kaddr) && kaddr + size >= kaddr); 563 564 if (uaddr + size > VM_MAXUSER_ADDRESS || uaddr + size < uaddr) { 565 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); 566 cpu_core[curcpu].cpuc_dtrace_illval = uaddr; 567 return (0); 568 } 569 570 return (1); 571 } 572 573 void 574 dtrace_copyin(uintptr_t uaddr, uintptr_t kaddr, size_t size, 575 volatile uint16_t *flags) 576 { 577 if (dtrace_copycheck(uaddr, kaddr, size)) 578 dtrace_copy(uaddr, kaddr, size); 579 } 580 581 void 582 dtrace_copyout(uintptr_t kaddr, uintptr_t uaddr, size_t size, 583 volatile uint16_t *flags) 584 { 585 if (dtrace_copycheck(uaddr, kaddr, size)) 586 dtrace_copy(kaddr, uaddr, size); 587 } 588 589 void 590 dtrace_copyinstr(uintptr_t uaddr, uintptr_t kaddr, size_t size, 591 volatile uint16_t *flags) 592 { 593 if (dtrace_copycheck(uaddr, kaddr, size)) 594 dtrace_copystr(uaddr, kaddr, size, flags); 595 } 596 597 void 598 dtrace_copyoutstr(uintptr_t kaddr, uintptr_t uaddr, size_t size, 599 volatile uint16_t *flags) 600 { 601 if (dtrace_copycheck(uaddr, kaddr, size)) 602 dtrace_copystr(kaddr, uaddr, size, flags); 603 } 604 605 uint8_t 606 dtrace_fuword8(void *uaddr) 607 { 608 if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) { 609 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); 610 cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr; 611 return (0); 612 } 613 return (dtrace_fuword8_nocheck(uaddr)); 614 } 615 616 uint16_t 617 dtrace_fuword16(void *uaddr) 618 { 619 if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) { 620 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); 621 cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr; 622 return (0); 623 } 624 return (dtrace_fuword16_nocheck(uaddr)); 625 } 626 627 uint32_t 628 dtrace_fuword32(void *uaddr) 629 { 630 if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) { 631 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); 632 cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr; 633 return (0); 634 } 635 return (dtrace_fuword32_nocheck(uaddr)); 636 } 637 638 uint64_t 639 dtrace_fuword64(void *uaddr) 640 { 641 if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) { 642 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); 643 cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr; 644 return (0); 645 } 646 return (dtrace_fuword64_nocheck(uaddr)); 647 } 648 649 /* 650 * ifunc resolvers for SMAP support 651 */ 652 void dtrace_copy_nosmap(uintptr_t, uintptr_t, size_t); 653 void dtrace_copy_smap(uintptr_t, uintptr_t, size_t); 654 DEFINE_IFUNC(, void, dtrace_copy, (uintptr_t, uintptr_t, size_t)) 655 { 656 657 return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? 658 dtrace_copy_smap : dtrace_copy_nosmap); 659 } 660 661 void dtrace_copystr_nosmap(uintptr_t, uintptr_t, size_t, volatile uint16_t *); 662 void dtrace_copystr_smap(uintptr_t, uintptr_t, size_t, volatile uint16_t *); 663 DEFINE_IFUNC(, void, dtrace_copystr, (uintptr_t, uintptr_t, size_t, 664 volatile uint16_t *)) 665 { 666 667 return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? 668 dtrace_copystr_smap : dtrace_copystr_nosmap); 669 } 670 671 uintptr_t dtrace_fulword_nosmap(void *); 672 uintptr_t dtrace_fulword_smap(void *); 673 DEFINE_IFUNC(, uintptr_t, dtrace_fulword, (void *)) 674 { 675 676 return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? 677 dtrace_fulword_smap : dtrace_fulword_nosmap); 678 } 679 680 uint8_t dtrace_fuword8_nocheck_nosmap(void *); 681 uint8_t dtrace_fuword8_nocheck_smap(void *); 682 DEFINE_IFUNC(, uint8_t, dtrace_fuword8_nocheck, (void *)) 683 { 684 685 return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? 686 dtrace_fuword8_nocheck_smap : dtrace_fuword8_nocheck_nosmap); 687 } 688 689 uint16_t dtrace_fuword16_nocheck_nosmap(void *); 690 uint16_t dtrace_fuword16_nocheck_smap(void *); 691 DEFINE_IFUNC(, uint16_t, dtrace_fuword16_nocheck, (void *)) 692 { 693 694 return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? 695 dtrace_fuword16_nocheck_smap : dtrace_fuword16_nocheck_nosmap); 696 } 697 698 uint32_t dtrace_fuword32_nocheck_nosmap(void *); 699 uint32_t dtrace_fuword32_nocheck_smap(void *); 700 DEFINE_IFUNC(, uint32_t, dtrace_fuword32_nocheck, (void *)) 701 { 702 703 return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? 704 dtrace_fuword32_nocheck_smap : dtrace_fuword32_nocheck_nosmap); 705 } 706 707 uint64_t dtrace_fuword64_nocheck_nosmap(void *); 708 uint64_t dtrace_fuword64_nocheck_smap(void *); 709 DEFINE_IFUNC(, uint64_t, dtrace_fuword64_nocheck, (void *)) 710 { 711 712 return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ? 713 dtrace_fuword64_nocheck_smap : dtrace_fuword64_nocheck_nosmap); 714 } 715