1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/dtrace.h> 28 #include <sys/fasttrap.h> 29 #include <sys/x_call.h> 30 #include <sys/cmn_err.h> 31 #include <sys/trap.h> 32 #include <sys/psw.h> 33 #include <sys/privregs.h> 34 #include <sys/machsystm.h> 35 #include <vm/seg_kmem.h> 36 37 typedef struct dtrace_invop_hdlr { 38 int (*dtih_func)(uintptr_t, uintptr_t *, uintptr_t); 39 struct dtrace_invop_hdlr *dtih_next; 40 } dtrace_invop_hdlr_t; 41 42 dtrace_invop_hdlr_t *dtrace_invop_hdlr; 43 44 int 45 dtrace_invop(uintptr_t addr, uintptr_t *stack, uintptr_t eax) 46 { 47 dtrace_invop_hdlr_t *hdlr; 48 int rval; 49 50 for (hdlr = dtrace_invop_hdlr; hdlr != NULL; hdlr = hdlr->dtih_next) { 51 if ((rval = hdlr->dtih_func(addr, stack, eax)) != 0) 52 return (rval); 53 } 54 55 return (0); 56 } 57 58 void 59 dtrace_invop_add(int (*func)(uintptr_t, uintptr_t *, uintptr_t)) 60 { 61 dtrace_invop_hdlr_t *hdlr; 62 63 hdlr = kmem_alloc(sizeof (dtrace_invop_hdlr_t), KM_SLEEP); 64 hdlr->dtih_func = func; 65 hdlr->dtih_next = dtrace_invop_hdlr; 66 dtrace_invop_hdlr = hdlr; 67 } 68 69 void 70 dtrace_invop_remove(int (*func)(uintptr_t, uintptr_t *, uintptr_t)) 71 { 72 dtrace_invop_hdlr_t *hdlr = dtrace_invop_hdlr, *prev = NULL; 73 74 for (;;) { 75 if (hdlr == NULL) 76 panic("attempt to remove non-existent invop handler"); 77 78 if (hdlr->dtih_func == func) 79 break; 80 81 prev = hdlr; 82 hdlr = hdlr->dtih_next; 83 } 84 85 if (prev == NULL) { 86 ASSERT(dtrace_invop_hdlr == hdlr); 87 dtrace_invop_hdlr = hdlr->dtih_next; 88 } else { 89 ASSERT(dtrace_invop_hdlr != hdlr); 90 prev->dtih_next = hdlr->dtih_next; 91 } 92 93 kmem_free(hdlr, sizeof (dtrace_invop_hdlr_t)); 94 } 95 96 int 97 dtrace_getipl(void) 98 { 99 return (CPU->cpu_pri); 100 } 101 102 /*ARGSUSED*/ 103 void 104 dtrace_toxic_ranges(void (*func)(uintptr_t base, uintptr_t limit)) 105 { 106 #ifdef __amd64 107 extern uintptr_t toxic_addr; 108 extern size_t toxic_size; 109 110 (*func)(0, _userlimit); 111 112 if (hole_end > hole_start) 113 (*func)(hole_start, hole_end); 114 (*func)(toxic_addr, toxic_addr + toxic_size); 115 #else 116 extern void *device_arena_contains(void *, size_t, size_t *); 117 caddr_t vaddr; 118 size_t len; 119 120 for (vaddr = (caddr_t)kernelbase; vaddr < (caddr_t)KERNEL_TEXT; 121 vaddr += len) { 122 len = (caddr_t)KERNEL_TEXT - vaddr; 123 vaddr = device_arena_contains(vaddr, len, &len); 124 if (vaddr == NULL) 125 break; 126 (*func)((uintptr_t)vaddr, (uintptr_t)vaddr + len); 127 } 128 #endif 129 (*func)(0, _userlimit); 130 } 131 132 static int 133 dtrace_xcall_func(dtrace_xcall_t func, void *arg) 134 { 135 (*func)(arg); 136 137 return (0); 138 } 139 140 /*ARGSUSED*/ 141 void 142 dtrace_xcall(processorid_t cpu, dtrace_xcall_t func, void *arg) 143 { 144 cpuset_t set; 145 146 CPUSET_ZERO(set); 147 148 if (cpu == DTRACE_CPUALL) { 149 CPUSET_ALL(set); 150 } else { 151 CPUSET_ADD(set, cpu); 152 } 153 154 kpreempt_disable(); 155 xc_sync((xc_arg_t)func, (xc_arg_t)arg, 0, CPUSET2BV(set), 156 (xc_func_t)dtrace_xcall_func); 157 kpreempt_enable(); 158 } 159 160 void 161 dtrace_sync_func(void) 162 {} 163 164 void 165 dtrace_sync(void) 166 { 167 dtrace_xcall(DTRACE_CPUALL, (dtrace_xcall_t)dtrace_sync_func, NULL); 168 } 169 170 int (*dtrace_pid_probe_ptr)(struct regs *); 171 int (*dtrace_return_probe_ptr)(struct regs *); 172 173 void 174 dtrace_user_probe(struct regs *rp, caddr_t addr, processorid_t cpuid) 175 { 176 krwlock_t *rwp; 177 proc_t *p = curproc; 178 extern void trap(struct regs *, caddr_t, processorid_t); 179 180 if (USERMODE(rp->r_cs) || (rp->r_ps & PS_VM)) { 181 if (curthread->t_cred != p->p_cred) { 182 cred_t *oldcred = curthread->t_cred; 183 /* 184 * DTrace accesses t_cred in probe context. t_cred 185 * must always be either NULL, or point to a valid, 186 * allocated cred structure. 187 */ 188 curthread->t_cred = crgetcred(); 189 crfree(oldcred); 190 } 191 } 192 193 if (rp->r_trapno == T_DTRACE_RET) { 194 uint8_t step = curthread->t_dtrace_step; 195 uint8_t ret = curthread->t_dtrace_ret; 196 uintptr_t npc = curthread->t_dtrace_npc; 197 198 if (curthread->t_dtrace_ast) { 199 aston(curthread); 200 curthread->t_sig_check = 1; 201 } 202 203 /* 204 * Clear all user tracing flags. 205 */ 206 curthread->t_dtrace_ft = 0; 207 208 /* 209 * If we weren't expecting to take a return probe trap, kill 210 * the process as though it had just executed an unassigned 211 * trap instruction. 212 */ 213 if (step == 0) { 214 tsignal(curthread, SIGILL); 215 return; 216 } 217 218 /* 219 * If we hit this trap unrelated to a return probe, we're 220 * just here to reset the AST flag since we deferred a signal 221 * until after we logically single-stepped the instruction we 222 * copied out. 223 */ 224 if (ret == 0) { 225 rp->r_pc = npc; 226 return; 227 } 228 229 /* 230 * We need to wait until after we've called the 231 * dtrace_return_probe_ptr function pointer to set %pc. 232 */ 233 rwp = &CPU->cpu_ft_lock; 234 rw_enter(rwp, RW_READER); 235 if (dtrace_return_probe_ptr != NULL) 236 (void) (*dtrace_return_probe_ptr)(rp); 237 rw_exit(rwp); 238 rp->r_pc = npc; 239 240 } else if (rp->r_trapno == T_BPTFLT) { 241 uint8_t instr, instr2; 242 caddr_t linearpc; 243 rwp = &CPU->cpu_ft_lock; 244 245 /* 246 * The DTrace fasttrap provider uses the breakpoint trap 247 * (int 3). We let DTrace take the first crack at handling 248 * this trap; if it's not a probe that DTrace knowns about, 249 * we call into the trap() routine to handle it like a 250 * breakpoint placed by a conventional debugger. 251 */ 252 rw_enter(rwp, RW_READER); 253 if (dtrace_pid_probe_ptr != NULL && 254 (*dtrace_pid_probe_ptr)(rp) == 0) { 255 rw_exit(rwp); 256 return; 257 } 258 rw_exit(rwp); 259 260 if (dtrace_linear_pc(rp, p, &linearpc) != 0) { 261 trap(rp, addr, cpuid); 262 return; 263 } 264 265 /* 266 * If the instruction that caused the breakpoint trap doesn't 267 * look like an int 3 anymore, it may be that this tracepoint 268 * was removed just after the user thread executed it. In 269 * that case, return to user land to retry the instuction. 270 * Note that we assume the length of the instruction to retry 271 * is 1 byte because that's the length of FASTTRAP_INSTR. 272 * We check for r_pc > 0 and > 2 so that we don't have to 273 * deal with segment wraparound. 274 */ 275 if (rp->r_pc > 0 && fuword8(linearpc - 1, &instr) == 0 && 276 instr != FASTTRAP_INSTR && 277 (instr != 3 || (rp->r_pc >= 2 && 278 (fuword8(linearpc - 2, &instr2) != 0 || instr2 != 0xCD)))) { 279 rp->r_pc--; 280 return; 281 } 282 283 trap(rp, addr, cpuid); 284 285 } else { 286 trap(rp, addr, cpuid); 287 } 288 } 289 290 void 291 dtrace_safe_synchronous_signal(void) 292 { 293 kthread_t *t = curthread; 294 struct regs *rp = lwptoregs(ttolwp(t)); 295 size_t isz = t->t_dtrace_npc - t->t_dtrace_pc; 296 297 ASSERT(t->t_dtrace_on); 298 299 /* 300 * If we're not in the range of scratch addresses, we're not actually 301 * tracing user instructions so turn off the flags. If the instruction 302 * we copied out caused a synchonous trap, reset the pc back to its 303 * original value and turn off the flags. 304 */ 305 if (rp->r_pc < t->t_dtrace_scrpc || 306 rp->r_pc > t->t_dtrace_astpc + isz) { 307 t->t_dtrace_ft = 0; 308 } else if (rp->r_pc == t->t_dtrace_scrpc || 309 rp->r_pc == t->t_dtrace_astpc) { 310 rp->r_pc = t->t_dtrace_pc; 311 t->t_dtrace_ft = 0; 312 } 313 } 314 315 int 316 dtrace_safe_defer_signal(void) 317 { 318 kthread_t *t = curthread; 319 struct regs *rp = lwptoregs(ttolwp(t)); 320 size_t isz = t->t_dtrace_npc - t->t_dtrace_pc; 321 322 ASSERT(t->t_dtrace_on); 323 324 /* 325 * If we're not in the range of scratch addresses, we're not actually 326 * tracing user instructions so turn off the flags. 327 */ 328 if (rp->r_pc < t->t_dtrace_scrpc || 329 rp->r_pc > t->t_dtrace_astpc + isz) { 330 t->t_dtrace_ft = 0; 331 return (0); 332 } 333 334 /* 335 * If we've executed the original instruction, but haven't performed 336 * the jmp back to t->t_dtrace_npc or the clean up of any registers 337 * used to emulate %rip-relative instructions in 64-bit mode, do that 338 * here and take the signal right away. We detect this condition by 339 * seeing if the program counter is the range [scrpc + isz, astpc). 340 */ 341 if (t->t_dtrace_astpc - rp->r_pc < 342 t->t_dtrace_astpc - t->t_dtrace_scrpc - isz) { 343 #ifdef __amd64 344 /* 345 * If there is a scratch register and we're on the 346 * instruction immediately after the modified instruction, 347 * restore the value of that scratch register. 348 */ 349 if (t->t_dtrace_reg != 0 && 350 rp->r_pc == t->t_dtrace_scrpc + isz) { 351 switch (t->t_dtrace_reg) { 352 case REG_RAX: 353 rp->r_rax = t->t_dtrace_regv; 354 break; 355 case REG_RCX: 356 rp->r_rcx = t->t_dtrace_regv; 357 break; 358 case REG_R8: 359 rp->r_r8 = t->t_dtrace_regv; 360 break; 361 case REG_R9: 362 rp->r_r9 = t->t_dtrace_regv; 363 break; 364 } 365 } 366 #endif 367 rp->r_pc = t->t_dtrace_npc; 368 t->t_dtrace_ft = 0; 369 return (0); 370 } 371 372 /* 373 * Otherwise, make sure we'll return to the kernel after executing 374 * the copied out instruction and defer the signal. 375 */ 376 if (!t->t_dtrace_step) { 377 ASSERT(rp->r_pc < t->t_dtrace_astpc); 378 rp->r_pc += t->t_dtrace_astpc - t->t_dtrace_scrpc; 379 t->t_dtrace_step = 1; 380 } 381 382 t->t_dtrace_ast = 1; 383 384 return (1); 385 } 386 387 /* 388 * Additional artificial frames for the machine type. For i86pc, we're already 389 * accounted for, so return 0. On the hypervisor, we have an additional frame 390 * (xen_callback_handler). 391 */ 392 int 393 dtrace_mach_aframes(void) 394 { 395 #ifdef __xpv 396 return (1); 397 #else 398 return (0); 399 #endif 400 } 401