1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 * 22 * Portions Copyright 2012,2013 Justin Hibbits <jhibbits@freebsd.org> 23 */ 24 /* 25 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 26 * Use is subject to license terms. 27 */ 28 #include <sys/cdefs.h> 29 30 #include <sys/param.h> 31 #include <sys/systm.h> 32 #include <sys/dtrace_impl.h> 33 #include <sys/kernel.h> 34 #include <sys/stack.h> 35 #include <sys/sysent.h> 36 #include <sys/pcpu.h> 37 38 #include <machine/frame.h> 39 #include <machine/md_var.h> 40 #include <machine/psl.h> 41 #include <machine/reg.h> 42 #include <machine/stack.h> 43 44 #include <vm/vm.h> 45 #include <vm/vm_param.h> 46 #include <vm/pmap.h> 47 48 #include "regset.h" 49 50 /* Offset to the LR Save word (ppc32) */ 51 #define RETURN_OFFSET 4 52 /* Offset to LR Save word (ppc64). CR Save area sits between back chain and LR */ 53 #define RETURN_OFFSET64 16 54 55 #ifdef __powerpc64__ 56 #define OFFSET 4 /* Account for the TOC reload slot */ 57 #define FRAME_OFFSET 48 58 #else 59 #define OFFSET 0 60 #define FRAME_OFFSET 8 61 #endif 62 63 #define INKERNEL(x) (((x) <= VM_MAX_KERNEL_ADDRESS && \ 64 (x) >= VM_MIN_KERNEL_ADDRESS) || \ 65 (PMAP_HAS_DMAP && (x) >= DMAP_BASE_ADDRESS && \ 66 (x) <= DMAP_MAX_ADDRESS)) 67 68 static __inline int 69 dtrace_sp_inkernel(uintptr_t sp) 70 { 71 struct trapframe *frame; 72 vm_offset_t callpc; 73 74 /* Not within the kernel, or not aligned. */ 75 if (!INKERNEL(sp) || (sp & 0xf) != 0) 76 return (0); 77 #ifdef __powerpc64__ 78 callpc = *(vm_offset_t *)(sp + RETURN_OFFSET64); 79 #else 80 callpc = *(vm_offset_t *)(sp + RETURN_OFFSET); 81 #endif 82 if ((callpc & 3) || (callpc < 0x100)) 83 return (0); 84 85 /* 86 * trapexit() and asttrapexit() are sentinels 87 * for kernel stack tracing. 88 */ 89 if (callpc + OFFSET == (vm_offset_t) &trapexit || 90 callpc + OFFSET == (vm_offset_t) &asttrapexit) { 91 frame = (struct trapframe *)(sp + FRAME_OFFSET); 92 93 return ((frame->srr1 & PSL_PR) == 0); 94 } 95 96 return (1); 97 } 98 99 static __inline void 100 dtrace_next_sp_pc(uintptr_t sp, uintptr_t *nsp, uintptr_t *pc, uintptr_t *lr) 101 { 102 vm_offset_t callpc; 103 struct trapframe *frame; 104 105 if (lr != 0 && *lr != 0) 106 callpc = *lr; 107 else 108 #ifdef __powerpc64__ 109 callpc = *(vm_offset_t *)(sp + RETURN_OFFSET64); 110 #else 111 callpc = *(vm_offset_t *)(sp + RETURN_OFFSET); 112 #endif 113 114 /* 115 * trapexit() and asttrapexit() are sentinels 116 * for kernel stack tracing. 117 */ 118 if ((callpc + OFFSET == (vm_offset_t) &trapexit || 119 callpc + OFFSET == (vm_offset_t) &asttrapexit)) { 120 /* Access the trap frame */ 121 frame = (struct trapframe *)(sp + FRAME_OFFSET); 122 123 if (nsp != NULL) 124 *nsp = frame->fixreg[1]; 125 if (pc != NULL) 126 *pc = frame->srr0; 127 if (lr != NULL) 128 *lr = frame->lr; 129 return; 130 } 131 132 if (nsp != NULL) 133 *nsp = *(uintptr_t *)sp; 134 if (pc != NULL) 135 *pc = callpc; 136 /* lr is only valid for trap frames */ 137 if (lr != NULL) 138 *lr = 0; 139 } 140 141 void 142 dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes, 143 uint32_t *intrpc) 144 { 145 int depth = 0; 146 uintptr_t osp, sp, lr = 0; 147 vm_offset_t callpc; 148 pc_t caller = (pc_t) solaris_cpu[curcpu].cpu_dtrace_caller; 149 150 osp = PAGE_SIZE; 151 if (intrpc != 0) 152 pcstack[depth++] = (pc_t) intrpc; 153 154 aframes++; 155 156 sp = (uintptr_t)__builtin_frame_address(0); 157 158 while (depth < pcstack_limit) { 159 if (sp <= osp) 160 break; 161 162 if (!dtrace_sp_inkernel(sp)) 163 break; 164 osp = sp; 165 dtrace_next_sp_pc(osp, &sp, &callpc, &lr); 166 167 if (aframes > 0) { 168 aframes--; 169 if ((aframes == 0) && (caller != 0)) { 170 pcstack[depth++] = caller; 171 } 172 } 173 else { 174 pcstack[depth++] = callpc; 175 } 176 } 177 178 for (; depth < pcstack_limit; depth++) { 179 pcstack[depth] = 0; 180 } 181 } 182 183 static int 184 dtrace_getustack_common(uint64_t *pcstack, int pcstack_limit, uintptr_t pc, 185 uintptr_t sp) 186 { 187 proc_t *p = curproc; 188 int ret = 0; 189 190 ASSERT(pcstack == NULL || pcstack_limit > 0); 191 192 while (pc != 0) { 193 ret++; 194 if (pcstack != NULL) { 195 *pcstack++ = (uint64_t)pc; 196 pcstack_limit--; 197 if (pcstack_limit <= 0) 198 break; 199 } 200 201 if (sp == 0) 202 break; 203 204 if (SV_PROC_FLAG(p, SV_ILP32)) { 205 pc = dtrace_fuword32((void *)(sp + RETURN_OFFSET)); 206 sp = dtrace_fuword32((void *)sp); 207 } 208 else { 209 pc = dtrace_fuword64((void *)(sp + RETURN_OFFSET64)); 210 sp = dtrace_fuword64((void *)sp); 211 } 212 } 213 214 return (ret); 215 } 216 217 void 218 dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit) 219 { 220 proc_t *p = curproc; 221 struct trapframe *tf; 222 uintptr_t pc, sp; 223 volatile uint16_t *flags = 224 (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags; 225 int n; 226 227 if (*flags & CPU_DTRACE_FAULT) 228 return; 229 230 if (pcstack_limit <= 0) 231 return; 232 233 /* 234 * If there's no user context we still need to zero the stack. 235 */ 236 if (p == NULL || (tf = curthread->td_frame) == NULL) 237 goto zero; 238 239 *pcstack++ = (uint64_t)p->p_pid; 240 pcstack_limit--; 241 242 if (pcstack_limit <= 0) 243 return; 244 245 pc = tf->srr0; 246 sp = tf->fixreg[1]; 247 248 if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) { 249 /* 250 * In an entry probe. The frame pointer has not yet been 251 * pushed (that happens in the function prologue). The 252 * best approach is to add the current pc as a missing top 253 * of stack and back the pc up to the caller, which is stored 254 * at the current stack pointer address since the call 255 * instruction puts it there right before the branch. 256 */ 257 258 *pcstack++ = (uint64_t)pc; 259 pcstack_limit--; 260 if (pcstack_limit <= 0) 261 return; 262 263 pc = tf->lr; 264 } 265 266 n = dtrace_getustack_common(pcstack, pcstack_limit, pc, sp); 267 ASSERT(n >= 0); 268 ASSERT(n <= pcstack_limit); 269 270 pcstack += n; 271 pcstack_limit -= n; 272 273 zero: 274 while (pcstack_limit-- > 0) 275 *pcstack++ = 0; 276 } 277 278 int 279 dtrace_getustackdepth(void) 280 { 281 proc_t *p = curproc; 282 struct trapframe *tf; 283 uintptr_t pc, sp; 284 int n = 0; 285 286 if (p == NULL || (tf = curthread->td_frame) == NULL) 287 return (0); 288 289 if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT)) 290 return (-1); 291 292 pc = tf->srr0; 293 sp = tf->fixreg[1]; 294 295 if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) { 296 /* 297 * In an entry probe. The frame pointer has not yet been 298 * pushed (that happens in the function prologue). The 299 * best approach is to add the current pc as a missing top 300 * of stack and back the pc up to the caller, which is stored 301 * at the current stack pointer address since the call 302 * instruction puts it there right before the branch. 303 */ 304 305 if (SV_PROC_FLAG(p, SV_ILP32)) { 306 pc = dtrace_fuword32((void *) sp); 307 } 308 else 309 pc = dtrace_fuword64((void *) sp); 310 n++; 311 } 312 313 n += dtrace_getustack_common(NULL, 0, pc, sp); 314 315 return (n); 316 } 317 318 void 319 dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit) 320 { 321 proc_t *p = curproc; 322 struct trapframe *tf; 323 uintptr_t pc, sp; 324 volatile uint16_t *flags = 325 (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags; 326 #ifdef notyet /* XXX signal stack */ 327 uintptr_t oldcontext; 328 size_t s1, s2; 329 #endif 330 331 if (*flags & CPU_DTRACE_FAULT) 332 return; 333 334 if (pcstack_limit <= 0) 335 return; 336 337 /* 338 * If there's no user context we still need to zero the stack. 339 */ 340 if (p == NULL || (tf = curthread->td_frame) == NULL) 341 goto zero; 342 343 *pcstack++ = (uint64_t)p->p_pid; 344 pcstack_limit--; 345 346 if (pcstack_limit <= 0) 347 return; 348 349 pc = tf->srr0; 350 sp = tf->fixreg[1]; 351 352 #ifdef notyet /* XXX signal stack */ 353 oldcontext = lwp->lwp_oldcontext; 354 s1 = sizeof (struct xframe) + 2 * sizeof (long); 355 s2 = s1 + sizeof (siginfo_t); 356 #endif 357 358 if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) { 359 *pcstack++ = (uint64_t)pc; 360 *fpstack++ = 0; 361 pcstack_limit--; 362 if (pcstack_limit <= 0) 363 return; 364 365 if (SV_PROC_FLAG(p, SV_ILP32)) { 366 pc = dtrace_fuword32((void *)sp); 367 } 368 else { 369 pc = dtrace_fuword64((void *)sp); 370 } 371 } 372 373 while (pc != 0) { 374 *pcstack++ = (uint64_t)pc; 375 *fpstack++ = sp; 376 pcstack_limit--; 377 if (pcstack_limit <= 0) 378 break; 379 380 if (sp == 0) 381 break; 382 383 #ifdef notyet /* XXX signal stack */ 384 if (oldcontext == sp + s1 || oldcontext == sp + s2) { 385 ucontext_t *ucp = (ucontext_t *)oldcontext; 386 greg_t *gregs = ucp->uc_mcontext.gregs; 387 388 sp = dtrace_fulword(&gregs[REG_FP]); 389 pc = dtrace_fulword(&gregs[REG_PC]); 390 391 oldcontext = dtrace_fulword(&ucp->uc_link); 392 } else 393 #endif /* XXX */ 394 { 395 if (SV_PROC_FLAG(p, SV_ILP32)) { 396 pc = dtrace_fuword32((void *)(sp + RETURN_OFFSET)); 397 sp = dtrace_fuword32((void *)sp); 398 } 399 else { 400 pc = dtrace_fuword64((void *)(sp + RETURN_OFFSET64)); 401 sp = dtrace_fuword64((void *)sp); 402 } 403 } 404 405 /* 406 * This is totally bogus: if we faulted, we're going to clear 407 * the fault and break. This is to deal with the apparently 408 * broken Java stacks on x86. 409 */ 410 if (*flags & CPU_DTRACE_FAULT) { 411 *flags &= ~CPU_DTRACE_FAULT; 412 break; 413 } 414 } 415 416 zero: 417 while (pcstack_limit-- > 0) 418 *pcstack++ = 0; 419 } 420 421 /*ARGSUSED*/ 422 uint64_t 423 dtrace_getarg(int arg, int aframes) 424 { 425 uintptr_t val; 426 uintptr_t *fp = (uintptr_t *)__builtin_frame_address(0); 427 uintptr_t *stack; 428 int i; 429 430 /* 431 * A total of 8 arguments are passed via registers; any argument with 432 * index of 7 or lower is therefore in a register. 433 */ 434 int inreg = 7; 435 436 for (i = 1; i <= aframes; i++) { 437 fp = (uintptr_t *)*fp; 438 439 /* 440 * On ppc32 trapexit() is the immediately following label. On 441 * ppc64 AIM trapexit() follows a nop. 442 */ 443 #ifdef __powerpc64__ 444 if ((long)(fp[2]) + 4 == (long)trapexit) { 445 #else 446 if ((long)(fp[1]) == (long)trapexit) { 447 #endif 448 /* 449 * In the case of powerpc, we will use the pointer to the regs 450 * structure that was pushed when we took the trap. To get this 451 * structure, we must increment beyond the frame structure. If the 452 * argument that we're seeking is passed on the stack, we'll pull 453 * the true stack pointer out of the saved registers and decrement 454 * our argument by the number of arguments passed in registers; if 455 * the argument we're seeking is passed in regsiters, we can just 456 * load it directly. 457 */ 458 #ifdef __powerpc64__ 459 struct reg *rp = (struct reg *)((uintptr_t)fp[0] + 48); 460 #else 461 struct reg *rp = (struct reg *)((uintptr_t)fp[0] + 8); 462 #endif 463 464 if (arg <= inreg) { 465 stack = &rp->fixreg[3]; 466 } else { 467 stack = (uintptr_t *)(rp->fixreg[1]); 468 arg -= inreg; 469 } 470 goto load; 471 } 472 473 } 474 475 /* 476 * We know that we did not come through a trap to get into 477 * dtrace_probe() -- the provider simply called dtrace_probe() 478 * directly. As this is the case, we need to shift the argument 479 * that we're looking for: the probe ID is the first argument to 480 * dtrace_probe(), so the argument n will actually be found where 481 * one would expect to find argument (n + 1). 482 */ 483 arg++; 484 485 if (arg <= inreg) { 486 /* 487 * This shouldn't happen. If the argument is passed in a 488 * register then it should have been, well, passed in a 489 * register... 490 */ 491 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); 492 return (0); 493 } 494 495 arg -= (inreg + 1); 496 stack = fp + 2; 497 498 load: 499 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 500 val = stack[arg]; 501 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 502 503 return (val); 504 } 505 506 int 507 dtrace_getstackdepth(int aframes) 508 { 509 int depth = 0; 510 uintptr_t osp, sp; 511 vm_offset_t callpc; 512 513 osp = PAGE_SIZE; 514 sp = (uintptr_t)__builtin_frame_address(0); 515 for(;;) { 516 if (sp <= osp) 517 break; 518 519 if (!dtrace_sp_inkernel(sp)) 520 break; 521 522 depth++; 523 osp = sp; 524 dtrace_next_sp_pc(sp, &sp, NULL, NULL); 525 } 526 if (depth < aframes) 527 return (0); 528 529 return (depth - aframes); 530 } 531 532 ulong_t 533 dtrace_getreg(struct trapframe *frame, uint_t reg) 534 { 535 if (reg < 32) 536 return (frame->fixreg[reg]); 537 538 switch (reg) { 539 case 32: 540 return (frame->lr); 541 case 33: 542 return (frame->cr); 543 case 34: 544 return (frame->xer); 545 case 35: 546 return (frame->ctr); 547 case 36: 548 return (frame->srr0); 549 case 37: 550 return (frame->srr1); 551 case 38: 552 return (frame->exc); 553 default: 554 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); 555 return (0); 556 } 557 } 558 559 static int 560 dtrace_copycheck(uintptr_t uaddr, uintptr_t kaddr, size_t size) 561 { 562 ASSERT(INKERNEL(kaddr) && kaddr + size >= kaddr); 563 564 if (uaddr + size > VM_MAXUSER_ADDRESS || uaddr + size < uaddr) { 565 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); 566 cpu_core[curcpu].cpuc_dtrace_illval = uaddr; 567 return (0); 568 } 569 570 return (1); 571 } 572 573 void 574 dtrace_copyin(uintptr_t uaddr, uintptr_t kaddr, size_t size, 575 volatile uint16_t *flags) 576 { 577 if (dtrace_copycheck(uaddr, kaddr, size)) 578 if (copyin((const void *)uaddr, (void *)kaddr, size)) { 579 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); 580 cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr; 581 } 582 } 583 584 void 585 dtrace_copyout(uintptr_t kaddr, uintptr_t uaddr, size_t size, 586 volatile uint16_t *flags) 587 { 588 if (dtrace_copycheck(uaddr, kaddr, size)) { 589 if (copyout((const void *)kaddr, (void *)uaddr, size)) { 590 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); 591 cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr; 592 } 593 } 594 } 595 596 void 597 dtrace_copyinstr(uintptr_t uaddr, uintptr_t kaddr, size_t size, 598 volatile uint16_t *flags) 599 { 600 size_t actual; 601 int error; 602 603 if (dtrace_copycheck(uaddr, kaddr, size)) { 604 error = copyinstr((const void *)uaddr, (void *)kaddr, 605 size, &actual); 606 607 /* ENAMETOOLONG is not a fault condition. */ 608 if (error && error != ENAMETOOLONG) { 609 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); 610 cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr; 611 } 612 } 613 } 614 615 /* 616 * The bulk of this function could be replaced to match dtrace_copyinstr() 617 * if we ever implement a copyoutstr(). 618 */ 619 void 620 dtrace_copyoutstr(uintptr_t kaddr, uintptr_t uaddr, size_t size, 621 volatile uint16_t *flags) 622 { 623 size_t len; 624 625 if (dtrace_copycheck(uaddr, kaddr, size)) { 626 len = strlen((const char *)kaddr); 627 if (len > size) 628 len = size; 629 630 if (copyout((const void *)kaddr, (void *)uaddr, len)) { 631 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); 632 cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr; 633 } 634 } 635 } 636 637 uint8_t 638 dtrace_fuword8(void *uaddr) 639 { 640 if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) { 641 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); 642 cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr; 643 return (0); 644 } 645 return (fubyte(uaddr)); 646 } 647 648 uint16_t 649 dtrace_fuword16(void *uaddr) 650 { 651 uint16_t ret = 0; 652 653 if (dtrace_copycheck((uintptr_t)uaddr, (uintptr_t)&ret, sizeof(ret))) { 654 if (copyin((const void *)uaddr, (void *)&ret, sizeof(ret))) { 655 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); 656 cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr; 657 } 658 } 659 return ret; 660 } 661 662 uint32_t 663 dtrace_fuword32(void *uaddr) 664 { 665 if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) { 666 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); 667 cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr; 668 return (0); 669 } 670 return (fuword32(uaddr)); 671 } 672 673 uint64_t 674 dtrace_fuword64(void *uaddr) 675 { 676 uint64_t ret = 0; 677 678 if (dtrace_copycheck((uintptr_t)uaddr, (uintptr_t)&ret, sizeof(ret))) { 679 if (copyin((const void *)uaddr, (void *)&ret, sizeof(ret))) { 680 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); 681 cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr; 682 } 683 } 684 return ret; 685 } 686 687 uintptr_t 688 dtrace_fulword(void *uaddr) 689 { 690 uintptr_t ret = 0; 691 692 if (dtrace_copycheck((uintptr_t)uaddr, (uintptr_t)&ret, sizeof(ret))) { 693 if (copyin((const void *)uaddr, (void *)&ret, sizeof(ret))) { 694 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); 695 cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr; 696 } 697 } 698 return ret; 699 } 700