/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* * Copyright (c) 2013, 2014 by Delphix. All rights reserved. * Copyright (c) 2017 Joyent, Inc. */ #include #include #include #include #include #include extern uintptr_t kernelbase; int dtrace_ustackdepth_max = 2048; void dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes, uint32_t *intrpc) { struct frame *fp = (struct frame *)dtrace_getfp(); struct frame *nextfp, *minfp, *stacktop; int depth = 0; int on_intr, last = 0; uintptr_t pc; uintptr_t caller = CPU->cpu_dtrace_caller; if ((on_intr = CPU_ON_INTR(CPU)) != 0) stacktop = (struct frame *)(CPU->cpu_intr_stack + SA(MINFRAME)); else stacktop = (struct frame *)curthread->t_stk; minfp = fp; aframes++; if (intrpc != NULL && depth < pcstack_limit) pcstack[depth++] = (pc_t)intrpc; while (depth < pcstack_limit) { nextfp = (struct frame *)fp->fr_savfp; pc = fp->fr_savpc; if (nextfp <= minfp || nextfp >= stacktop) { if (on_intr) { /* * Hop from interrupt stack to thread stack. */ stacktop = (struct frame *)curthread->t_stk; minfp = (struct frame *)curthread->t_stkbase; on_intr = 0; continue; } /* * This is the last frame we can process; indicate * that we should return after processing this frame. */ last = 1; } if (aframes > 0) { if (--aframes == 0 && caller != 0) { /* * We've just run out of artificial frames, * and we have a valid caller -- fill it in * now. */ ASSERT(depth < pcstack_limit); pcstack[depth++] = (pc_t)caller; caller = 0; } } else { if (depth < pcstack_limit) pcstack[depth++] = (pc_t)pc; } if (last) { while (depth < pcstack_limit) pcstack[depth++] = 0; return; } fp = nextfp; minfp = fp; } } static int dtrace_getustack_common(uint64_t *pcstack, int pcstack_limit, uintptr_t pc, uintptr_t sp) { klwp_t *lwp = ttolwp(curthread); proc_t *p = curproc; uintptr_t oldcontext = lwp->lwp_oldcontext; uintptr_t oldsp; volatile uint16_t *flags = (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags; size_t s1, s2; int ret = 0; ASSERT(pcstack == NULL || pcstack_limit > 0); ASSERT(dtrace_ustackdepth_max > 0); if (p->p_model == DATAMODEL_NATIVE) { s1 = sizeof (struct frame) + 2 * sizeof (long); s2 = s1 + sizeof (siginfo_t); } else { s1 = sizeof (struct frame32) + 3 * sizeof (int); s2 = s1 + sizeof (siginfo32_t); } while (pc != 0) { /* * We limit the number of times we can go around this * loop to account for a circular stack. */ if (ret++ >= dtrace_ustackdepth_max) { *flags |= CPU_DTRACE_BADSTACK; cpu_core[CPU->cpu_id].cpuc_dtrace_illval = sp; break; } if (pcstack != NULL) { *pcstack++ = (uint64_t)pc; pcstack_limit--; if (pcstack_limit <= 0) break; } if (sp == 0) break; oldsp = sp; if (oldcontext == sp + s1 || oldcontext == sp + s2) { if (p->p_model == DATAMODEL_NATIVE) { ucontext_t *ucp = (ucontext_t *)oldcontext; greg_t *gregs = ucp->uc_mcontext.gregs; sp = dtrace_fulword(&gregs[REG_FP]); pc = dtrace_fulword(&gregs[REG_PC]); oldcontext = dtrace_fulword(&ucp->uc_link); } else { ucontext32_t *ucp = (ucontext32_t *)oldcontext; greg32_t *gregs = ucp->uc_mcontext.gregs; sp = dtrace_fuword32(&gregs[EBP]); pc = dtrace_fuword32(&gregs[EIP]); oldcontext = dtrace_fuword32(&ucp->uc_link); } } else { if (p->p_model == DATAMODEL_NATIVE) { struct frame *fr = (struct frame *)sp; pc = dtrace_fulword(&fr->fr_savpc); sp = dtrace_fulword(&fr->fr_savfp); } else { struct frame32 *fr = (struct frame32 *)sp; pc = dtrace_fuword32(&fr->fr_savpc); sp = dtrace_fuword32(&fr->fr_savfp); } } if (sp == oldsp) { *flags |= CPU_DTRACE_BADSTACK; cpu_core[CPU->cpu_id].cpuc_dtrace_illval = sp; break; } /* * This is totally bogus: if we faulted, we're going to clear * the fault and break. This is to deal with the apparently * broken Java stacks on x86. */ if (*flags & CPU_DTRACE_FAULT) { *flags &= ~CPU_DTRACE_FAULT; break; } } return (ret); } void dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit) { klwp_t *lwp = ttolwp(curthread); proc_t *p = curproc; struct regs *rp; uintptr_t pc, sp; int n; ASSERT(DTRACE_CPUFLAG_ISSET(CPU_DTRACE_NOFAULT)); if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT)) return; if (pcstack_limit <= 0) return; /* * If there's no user context we still need to zero the stack. */ if (lwp == NULL || p == NULL || (rp = lwp->lwp_regs) == NULL) goto zero; *pcstack++ = (uint64_t)p->p_pid; pcstack_limit--; if (pcstack_limit <= 0) return; pc = rp->r_pc; sp = rp->r_fp; if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) { *pcstack++ = (uint64_t)pc; pcstack_limit--; if (pcstack_limit <= 0) return; if (p->p_model == DATAMODEL_NATIVE) pc = dtrace_fulword((void *)rp->r_sp); else pc = dtrace_fuword32((void *)rp->r_sp); } n = dtrace_getustack_common(pcstack, pcstack_limit, pc, sp); ASSERT(n >= 0); ASSERT(n <= pcstack_limit); pcstack += n; pcstack_limit -= n; zero: while (pcstack_limit-- > 0) *pcstack++ = 0; } int dtrace_getustackdepth(void) { klwp_t *lwp = ttolwp(curthread); proc_t *p = curproc; struct regs *rp; uintptr_t pc, sp; int n = 0; if (lwp == NULL || p == NULL || (rp = lwp->lwp_regs) == NULL) return (0); if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT)) return (-1); pc = rp->r_pc; sp = rp->r_fp; if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) { n++; if (p->p_model == DATAMODEL_NATIVE) pc = dtrace_fulword((void *)rp->r_sp); else pc = dtrace_fuword32((void *)rp->r_sp); } n += dtrace_getustack_common(NULL, 0, pc, sp); return (n); } void dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit) { klwp_t *lwp = ttolwp(curthread); proc_t *p = curproc; struct regs *rp; uintptr_t pc, sp, oldcontext; volatile uint16_t *flags = (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags; size_t s1, s2; if (*flags & CPU_DTRACE_FAULT) return; if (pcstack_limit <= 0) return; /* * If there's no user context we still need to zero the stack. */ if (lwp == NULL || p == NULL || (rp = lwp->lwp_regs) == NULL) goto zero; *pcstack++ = (uint64_t)p->p_pid; pcstack_limit--; if (pcstack_limit <= 0) return; pc = rp->r_pc; sp = rp->r_fp; oldcontext = lwp->lwp_oldcontext; if (p->p_model == DATAMODEL_NATIVE) { s1 = sizeof (struct frame) + 2 * sizeof (long); s2 = s1 + sizeof (siginfo_t); } else { s1 = sizeof (struct frame32) + 3 * sizeof (int); s2 = s1 + sizeof (siginfo32_t); } if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) { *pcstack++ = (uint64_t)pc; *fpstack++ = 0; pcstack_limit--; if (pcstack_limit <= 0) return; if (p->p_model == DATAMODEL_NATIVE) pc = dtrace_fulword((void *)rp->r_sp); else pc = dtrace_fuword32((void *)rp->r_sp); } while (pc != 0) { *pcstack++ = (uint64_t)pc; *fpstack++ = sp; pcstack_limit--; if (pcstack_limit <= 0) break; if (sp == 0) break; if (oldcontext == sp + s1 || oldcontext == sp + s2) { if (p->p_model == DATAMODEL_NATIVE) { ucontext_t *ucp = (ucontext_t *)oldcontext; greg_t *gregs = ucp->uc_mcontext.gregs; sp = dtrace_fulword(&gregs[REG_FP]); pc = dtrace_fulword(&gregs[REG_PC]); oldcontext = dtrace_fulword(&ucp->uc_link); } else { ucontext_t *ucp = (ucontext_t *)oldcontext; greg_t *gregs = ucp->uc_mcontext.gregs; sp = dtrace_fuword32(&gregs[EBP]); pc = dtrace_fuword32(&gregs[EIP]); oldcontext = dtrace_fuword32(&ucp->uc_link); } } else { if (p->p_model == DATAMODEL_NATIVE) { struct frame *fr = (struct frame *)sp; pc = dtrace_fulword(&fr->fr_savpc); sp = dtrace_fulword(&fr->fr_savfp); } else { struct frame32 *fr = (struct frame32 *)sp; pc = dtrace_fuword32(&fr->fr_savpc); sp = dtrace_fuword32(&fr->fr_savfp); } } /* * This is totally bogus: if we faulted, we're going to clear * the fault and break. This is to deal with the apparently * broken Java stacks on x86. */ if (*flags & CPU_DTRACE_FAULT) { *flags &= ~CPU_DTRACE_FAULT; break; } } zero: while (pcstack_limit-- > 0) *pcstack++ = 0; } /*ARGSUSED*/ uint64_t dtrace_getarg(int arg, int aframes) { uintptr_t val; struct frame *fp = (struct frame *)dtrace_getfp(); uintptr_t *stack; int i; #if defined(__amd64) /* * A total of 6 arguments are passed via registers; any argument with * index of 5 or lower is therefore in a register. */ int inreg = 5; #endif for (i = 1; i <= aframes; i++) { fp = (struct frame *)(fp->fr_savfp); if (fp->fr_savpc == (pc_t)dtrace_invop_callsite) { #if !defined(__amd64) /* * If we pass through the invalid op handler, we will * use the pointer that it passed to the stack as the * second argument to dtrace_invop() as the pointer to * the stack. When using this stack, we must step * beyond the EIP that was pushed when the trap was * taken -- hence the "+ 1" below. */ stack = ((uintptr_t **)&fp[1])[1] + 1; #else /* * In the case of amd64, we will use the pointer to the * regs structure that was pushed when we took the * trap. To get this structure, we must increment * beyond the frame structure, the calling RIP, and * padding stored in dtrace_invop(). If the argument * that we're seeking is passed on the stack, we'll * pull the true stack pointer out of the saved * registers and decrement our argument by the number * of arguments passed in registers; if the argument * we're seeking is passed in regsiters, we can just * load it directly. */ struct regs *rp = (struct regs *)((uintptr_t)&fp[1] + sizeof (uintptr_t) * 2); if (arg <= inreg) { stack = (uintptr_t *)&rp->r_rdi; } else { stack = (uintptr_t *)(rp->r_rsp); arg -= inreg; } #endif goto load; } } /* * We know that we did not come through a trap to get into * dtrace_probe() -- the provider simply called dtrace_probe() * directly. As this is the case, we need to shift the argument * that we're looking for: the probe ID is the first argument to * dtrace_probe(), so the argument n will actually be found where * one would expect to find argument (n + 1). */ arg++; #if defined(__amd64) if (arg <= inreg) { /* * This shouldn't happen. If the argument is passed in a * register then it should have been, well, passed in a * register... */ DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); return (0); } arg -= (inreg + 1); #endif stack = (uintptr_t *)&fp[1]; load: DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); val = stack[arg]; DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); return (val); } /*ARGSUSED*/ int dtrace_getstackdepth(int aframes) { struct frame *fp = (struct frame *)dtrace_getfp(); struct frame *nextfp, *minfp, *stacktop; int depth = 0; int on_intr; if ((on_intr = CPU_ON_INTR(CPU)) != 0) stacktop = (struct frame *)(CPU->cpu_intr_stack + SA(MINFRAME)); else stacktop = (struct frame *)curthread->t_stk; minfp = fp; aframes++; for (;;) { depth++; nextfp = (struct frame *)fp->fr_savfp; if (nextfp <= minfp || nextfp >= stacktop) { if (on_intr) { /* * Hop from interrupt stack to thread stack. */ stacktop = (struct frame *)curthread->t_stk; minfp = (struct frame *)curthread->t_stkbase; on_intr = 0; continue; } break; } fp = nextfp; minfp = fp; } if (depth <= aframes) return (0); return (depth - aframes); } #if defined(__amd64) static const int dtrace_regmap[] = { REG_GS, /* GS */ REG_FS, /* FS */ REG_ES, /* ES */ REG_DS, /* DS */ REG_RDI, /* EDI */ REG_RSI, /* ESI */ REG_RBP, /* EBP */ REG_RSP, /* ESP */ REG_RBX, /* EBX */ REG_RDX, /* EDX */ REG_RCX, /* ECX */ REG_RAX, /* EAX */ REG_TRAPNO, /* TRAPNO */ REG_ERR, /* ERR */ REG_RIP, /* EIP */ REG_CS, /* CS */ REG_RFL, /* EFL */ REG_RSP, /* UESP */ REG_SS /* SS */ }; #endif ulong_t dtrace_getreg(struct regs *rp, uint_t reg) { #if defined(__amd64) if (reg <= SS) { if (reg >= sizeof (dtrace_regmap) / sizeof (int)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); return (0); } reg = dtrace_regmap[reg]; } else { reg -= SS + 1; } switch (reg) { case REG_RDI: return (rp->r_rdi); case REG_RSI: return (rp->r_rsi); case REG_RDX: return (rp->r_rdx); case REG_RCX: return (rp->r_rcx); case REG_R8: return (rp->r_r8); case REG_R9: return (rp->r_r9); case REG_RAX: return (rp->r_rax); case REG_RBX: return (rp->r_rbx); case REG_RBP: return (rp->r_rbp); case REG_R10: return (rp->r_r10); case REG_R11: return (rp->r_r11); case REG_R12: return (rp->r_r12); case REG_R13: return (rp->r_r13); case REG_R14: return (rp->r_r14); case REG_R15: return (rp->r_r15); case REG_DS: return (rp->r_ds); case REG_ES: return (rp->r_es); case REG_FS: return (rp->r_fs); case REG_GS: return (rp->r_gs); case REG_TRAPNO: return (rp->r_trapno); case REG_ERR: return (rp->r_err); case REG_RIP: return (rp->r_rip); case REG_CS: return (rp->r_cs); case REG_SS: return (rp->r_ss); case REG_RFL: return (rp->r_rfl); case REG_RSP: return (rp->r_rsp); default: DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); return (0); } #else if (reg > SS) { DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); return (0); } return ((&rp->r_gs)[reg]); #endif } void dtrace_setreg(struct regs *rp, uint_t reg, ulong_t val) { #if defined(__amd64) if (reg <= SS) { ASSERT(reg < (sizeof (dtrace_regmap) / sizeof (int))); reg = dtrace_regmap[reg]; } else { reg -= SS + 1; } switch (reg) { case REG_RDI: rp->r_rdi = val; break; case REG_RSI: rp->r_rsi = val; break; case REG_RDX: rp->r_rdx = val; break; case REG_RCX: rp->r_rcx = val; break; case REG_R8: rp->r_r8 = val; break; case REG_R9: rp->r_r9 = val; break; case REG_RAX: rp->r_rax = val; break; case REG_RBX: rp->r_rbx = val; break; case REG_RBP: rp->r_rbp = val; break; case REG_R10: rp->r_r10 = val; break; case REG_R11: rp->r_r11 = val; break; case REG_R12: rp->r_r12 = val; break; case REG_R13: rp->r_r13 = val; break; case REG_R14: rp->r_r14 = val; break; case REG_R15: rp->r_r15 = val; break; case REG_RSP: rp->r_rsp = val; break; default: DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); return; } #else /* defined(__amd64) */ switch (reg) { case EAX: rp->r_eax = val; break; case ECX: rp->r_ecx = val; break; case EDX: rp->r_edx = val; break; case EBX: rp->r_ebx = val; break; case ESP: rp->r_esp = val; break; case EBP: rp->r_ebp = val; break; case ESI: rp->r_esi = val; break; case EDI: rp->r_edi = val; break; default: DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); return; } #endif /* defined(__amd64) */ } static int dtrace_copycheck(uintptr_t uaddr, uintptr_t kaddr, size_t size) { ASSERT(kaddr >= kernelbase && kaddr + size >= kaddr); if (uaddr + size >= kernelbase || uaddr + size < uaddr) { DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); cpu_core[CPU->cpu_id].cpuc_dtrace_illval = uaddr; return (0); } return (1); } /*ARGSUSED*/ void dtrace_copyin(uintptr_t uaddr, uintptr_t kaddr, size_t size, volatile uint16_t *flags) { if (dtrace_copycheck(uaddr, kaddr, size)) dtrace_copy(uaddr, kaddr, size); } /*ARGSUSED*/ void dtrace_copyout(uintptr_t kaddr, uintptr_t uaddr, size_t size, volatile uint16_t *flags) { if (dtrace_copycheck(uaddr, kaddr, size)) dtrace_copy(kaddr, uaddr, size); } void dtrace_copyinstr(uintptr_t uaddr, uintptr_t kaddr, size_t size, volatile uint16_t *flags) { if (dtrace_copycheck(uaddr, kaddr, size)) dtrace_copystr(uaddr, kaddr, size, flags); } void dtrace_copyoutstr(uintptr_t kaddr, uintptr_t uaddr, size_t size, volatile uint16_t *flags) { if (dtrace_copycheck(uaddr, kaddr, size)) dtrace_copystr(kaddr, uaddr, size, flags); } uint8_t dtrace_fuword8(void *uaddr) { extern uint8_t dtrace_fuword8_nocheck(void *); if ((uintptr_t)uaddr >= _userlimit) { DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); cpu_core[CPU->cpu_id].cpuc_dtrace_illval = (uintptr_t)uaddr; return (0); } return (dtrace_fuword8_nocheck(uaddr)); } uint16_t dtrace_fuword16(void *uaddr) { extern uint16_t dtrace_fuword16_nocheck(void *); if ((uintptr_t)uaddr >= _userlimit) { DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); cpu_core[CPU->cpu_id].cpuc_dtrace_illval = (uintptr_t)uaddr; return (0); } return (dtrace_fuword16_nocheck(uaddr)); } uint32_t dtrace_fuword32(void *uaddr) { extern uint32_t dtrace_fuword32_nocheck(void *); if ((uintptr_t)uaddr >= _userlimit) { DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); cpu_core[CPU->cpu_id].cpuc_dtrace_illval = (uintptr_t)uaddr; return (0); } return (dtrace_fuword32_nocheck(uaddr)); } uint64_t dtrace_fuword64(void *uaddr) { extern uint64_t dtrace_fuword64_nocheck(void *); if ((uintptr_t)uaddr >= _userlimit) { DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); cpu_core[CPU->cpu_id].cpuc_dtrace_illval = (uintptr_t)uaddr; return (0); } return (dtrace_fuword64_nocheck(uaddr)); }