1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 * 21 * Portions Copyright 2006-2008 John Birrell jb@freebsd.org 22 * 23 * $FreeBSD$ 24 * 25 */ 26 27 /* 28 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 29 * Use is subject to license terms. 30 */ 31 32 #include <sys/cdefs.h> 33 #include <sys/param.h> 34 35 #include <sys/dtrace.h> 36 37 #include <machine/cpufunc.h> 38 #include <machine/md_var.h> 39 40 #include "fbt.h" 41 42 #define FBT_PUSHL_EBP 0x55 43 #define FBT_MOVL_ESP_EBP0_V0 0x8b 44 #define FBT_MOVL_ESP_EBP1_V0 0xec 45 #define FBT_MOVL_ESP_EBP0_V1 0x89 46 #define FBT_MOVL_ESP_EBP1_V1 0xe5 47 #define FBT_REX_RSP_RBP 0x48 48 49 #define FBT_POPL_EBP 0x5d 50 #define FBT_RET 0xc3 51 #define FBT_RET_IMM16 0xc2 52 #define FBT_LEAVE 0xc9 53 54 #ifdef __amd64__ 55 #define FBT_PATCHVAL 0xcc 56 #else 57 #define FBT_PATCHVAL 0xf0 58 #endif 59 60 #define FBT_ENTRY "entry" 61 #define FBT_RETURN "return" 62 63 int 64 fbt_invop(uintptr_t addr, struct trapframe *frame, uintptr_t rval) 65 { 66 solaris_cpu_t *cpu; 67 uintptr_t *stack; 68 uintptr_t arg0, arg1, arg2, arg3, arg4; 69 fbt_probe_t *fbt; 70 int8_t fbtrval; 71 72 #ifdef __amd64__ 73 stack = (uintptr_t *)frame->tf_rsp; 74 #else 75 /* Skip hardware-saved registers. */ 76 stack = (uintptr_t *)frame->tf_isp + 3; 77 #endif 78 79 cpu = &solaris_cpu[curcpu]; 80 fbt = fbt_probetab[FBT_ADDR2NDX(addr)]; 81 for (; fbt != NULL; fbt = fbt->fbtp_hashnext) { 82 if ((uintptr_t)fbt->fbtp_patchpoint != addr) 83 continue; 84 fbtrval = fbt->fbtp_rval; 85 for (; fbt != NULL; fbt = fbt->fbtp_tracenext) { 86 ASSERT(fbt->fbtp_rval == fbtrval); 87 if (fbt->fbtp_roffset == 0) { 88 #ifdef __amd64__ 89 /* fbt->fbtp_rval == DTRACE_INVOP_PUSHQ_RBP */ 90 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 91 cpu->cpu_dtrace_caller = stack[0]; 92 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | 93 CPU_DTRACE_BADADDR); 94 95 arg0 = frame->tf_rdi; 96 arg1 = frame->tf_rsi; 97 arg2 = frame->tf_rdx; 98 arg3 = frame->tf_rcx; 99 arg4 = frame->tf_r8; 100 #else 101 int i = 0; 102 103 /* 104 * When accessing the arguments on the stack, 105 * we must protect against accessing beyond 106 * the stack. We can safely set NOFAULT here 107 * -- we know that interrupts are already 108 * disabled. 109 */ 110 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 111 cpu->cpu_dtrace_caller = stack[i++]; 112 arg0 = stack[i++]; 113 arg1 = stack[i++]; 114 arg2 = stack[i++]; 115 arg3 = stack[i++]; 116 arg4 = stack[i++]; 117 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | 118 CPU_DTRACE_BADADDR); 119 #endif 120 121 dtrace_probe(fbt->fbtp_id, arg0, arg1, 122 arg2, arg3, arg4); 123 124 cpu->cpu_dtrace_caller = 0; 125 } else { 126 #ifdef __amd64__ 127 /* 128 * On amd64, we instrument the ret, not the 129 * leave. We therefore need to set the caller 130 * to ensure that the top frame of a stack() 131 * action is correct. 132 */ 133 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 134 cpu->cpu_dtrace_caller = stack[0]; 135 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | 136 CPU_DTRACE_BADADDR); 137 #endif 138 139 dtrace_probe(fbt->fbtp_id, fbt->fbtp_roffset, 140 rval, 0, 0, 0); 141 cpu->cpu_dtrace_caller = 0; 142 } 143 } 144 return (fbtrval); 145 } 146 147 return (0); 148 } 149 150 void 151 fbt_patch_tracepoint(fbt_probe_t *fbt, fbt_patchval_t val) 152 { 153 register_t intr; 154 bool old_wp; 155 156 intr = intr_disable(); 157 old_wp = disable_wp(); 158 *fbt->fbtp_patchpoint = val; 159 restore_wp(old_wp); 160 intr_restore(intr); 161 } 162 163 int 164 fbt_provide_module_function(linker_file_t lf, int symindx, 165 linker_symval_t *symval, void *opaque) 166 { 167 char *modname = opaque; 168 const char *name = symval->name; 169 fbt_probe_t *fbt, *hash, *retfbt; 170 int j; 171 int size; 172 uint8_t *instr, *limit; 173 174 if (fbt_excluded(name)) 175 return (0); 176 177 /* 178 * trap_check() is a wrapper for DTrace's fault handler, so we don't 179 * want to be able to instrument it. 180 */ 181 if (strcmp(name, "trap_check") == 0) 182 return (0); 183 184 size = symval->size; 185 186 instr = (uint8_t *) symval->value; 187 limit = (uint8_t *) symval->value + symval->size; 188 189 #ifdef __amd64__ 190 while (instr < limit) { 191 if (*instr == FBT_PUSHL_EBP) 192 break; 193 194 if ((size = dtrace_instr_size(instr)) <= 0) 195 break; 196 197 instr += size; 198 } 199 200 if (instr >= limit || *instr != FBT_PUSHL_EBP) { 201 /* 202 * We either don't save the frame pointer in this 203 * function, or we ran into some disassembly 204 * screw-up. Either way, we bail. 205 */ 206 return (0); 207 } 208 #else 209 if (instr[0] != FBT_PUSHL_EBP) 210 return (0); 211 212 if (!(instr[1] == FBT_MOVL_ESP_EBP0_V0 && 213 instr[2] == FBT_MOVL_ESP_EBP1_V0) && 214 !(instr[1] == FBT_MOVL_ESP_EBP0_V1 && 215 instr[2] == FBT_MOVL_ESP_EBP1_V1)) 216 return (0); 217 #endif 218 219 fbt = malloc(sizeof (fbt_probe_t), M_FBT, M_WAITOK | M_ZERO); 220 fbt->fbtp_name = name; 221 fbt->fbtp_id = dtrace_probe_create(fbt_id, modname, 222 name, FBT_ENTRY, 3, fbt); 223 fbt->fbtp_patchpoint = instr; 224 fbt->fbtp_ctl = lf; 225 fbt->fbtp_loadcnt = lf->loadcnt; 226 fbt->fbtp_rval = DTRACE_INVOP_PUSHL_EBP; 227 fbt->fbtp_savedval = *instr; 228 fbt->fbtp_patchval = FBT_PATCHVAL; 229 fbt->fbtp_symindx = symindx; 230 231 for (hash = fbt_probetab[FBT_ADDR2NDX(instr)]; hash != NULL; 232 hash = hash->fbtp_hashnext) { 233 if (hash->fbtp_patchpoint == fbt->fbtp_patchpoint) { 234 fbt->fbtp_tracenext = hash->fbtp_tracenext; 235 hash->fbtp_tracenext = fbt; 236 break; 237 } 238 } 239 if (hash == NULL) { 240 fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)]; 241 fbt_probetab[FBT_ADDR2NDX(instr)] = fbt; 242 } 243 244 lf->fbt_nentries++; 245 246 retfbt = NULL; 247 again: 248 if (instr >= limit) 249 return (0); 250 251 /* 252 * If this disassembly fails, then we've likely walked off into 253 * a jump table or some other unsuitable area. Bail out of the 254 * disassembly now. 255 */ 256 if ((size = dtrace_instr_size(instr)) <= 0) 257 return (0); 258 259 #ifdef __amd64__ 260 /* 261 * We only instrument "ret" on amd64 -- we don't yet instrument 262 * ret imm16, largely because the compiler doesn't seem to 263 * (yet) emit them in the kernel... 264 */ 265 if (*instr != FBT_RET) { 266 instr += size; 267 goto again; 268 } 269 #else 270 if (!(size == 1 && 271 (*instr == FBT_POPL_EBP || *instr == FBT_LEAVE) && 272 (*(instr + 1) == FBT_RET || 273 *(instr + 1) == FBT_RET_IMM16))) { 274 instr += size; 275 goto again; 276 } 277 #endif 278 279 /* 280 * We (desperately) want to avoid erroneously instrumenting a 281 * jump table, especially given that our markers are pretty 282 * short: two bytes on x86, and just one byte on amd64. To 283 * determine if we're looking at a true instruction sequence 284 * or an inline jump table that happens to contain the same 285 * byte sequences, we resort to some heuristic sleeze: we 286 * treat this instruction as being contained within a pointer, 287 * and see if that pointer points to within the body of the 288 * function. If it does, we refuse to instrument it. 289 */ 290 for (j = 0; j < sizeof (uintptr_t); j++) { 291 caddr_t check = (caddr_t) instr - j; 292 uint8_t *ptr; 293 294 if (check < symval->value) 295 break; 296 297 if (check + sizeof (caddr_t) > (caddr_t)limit) 298 continue; 299 300 ptr = *(uint8_t **)check; 301 302 if (ptr >= (uint8_t *) symval->value && ptr < limit) { 303 instr += size; 304 goto again; 305 } 306 } 307 308 /* 309 * We have a winner! 310 */ 311 fbt = malloc(sizeof (fbt_probe_t), M_FBT, M_WAITOK | M_ZERO); 312 fbt->fbtp_name = name; 313 314 if (retfbt == NULL) { 315 fbt->fbtp_id = dtrace_probe_create(fbt_id, modname, 316 name, FBT_RETURN, 3, fbt); 317 } else { 318 retfbt->fbtp_probenext = fbt; 319 fbt->fbtp_id = retfbt->fbtp_id; 320 } 321 322 retfbt = fbt; 323 fbt->fbtp_patchpoint = instr; 324 fbt->fbtp_ctl = lf; 325 fbt->fbtp_loadcnt = lf->loadcnt; 326 fbt->fbtp_symindx = symindx; 327 328 #ifndef __amd64__ 329 if (*instr == FBT_POPL_EBP) { 330 fbt->fbtp_rval = DTRACE_INVOP_POPL_EBP; 331 } else { 332 ASSERT(*instr == FBT_LEAVE); 333 fbt->fbtp_rval = DTRACE_INVOP_LEAVE; 334 } 335 fbt->fbtp_roffset = 336 (uintptr_t)(instr - (uint8_t *) symval->value) + 1; 337 338 #else 339 ASSERT(*instr == FBT_RET); 340 fbt->fbtp_rval = DTRACE_INVOP_RET; 341 fbt->fbtp_roffset = 342 (uintptr_t)(instr - (uint8_t *) symval->value); 343 #endif 344 345 fbt->fbtp_savedval = *instr; 346 fbt->fbtp_patchval = FBT_PATCHVAL; 347 fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)]; 348 fbt_probetab[FBT_ADDR2NDX(instr)] = fbt; 349 350 lf->fbt_nentries++; 351 352 instr += size; 353 goto again; 354 } 355