1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/fasttrap_isa.h> 28 #include <sys/fasttrap_impl.h> 29 #include <sys/dtrace.h> 30 #include <sys/dtrace_impl.h> 31 #include <sys/cmn_err.h> 32 #include <sys/frame.h> 33 #include <sys/stack.h> 34 #include <sys/sysmacros.h> 35 #include <sys/trap.h> 36 37 #include <v9/sys/machpcb.h> 38 #include <v9/sys/privregs.h> 39 40 /* 41 * Lossless User-Land Tracing on SPARC 42 * ----------------------------------- 43 * 44 * The Basic Idea 45 * 46 * The most important design constraint is, of course, correct execution of 47 * the user thread above all else. The next most important goal is rapid 48 * execution. We combine execution of instructions in user-land with 49 * emulation of certain instructions in the kernel to aim for complete 50 * correctness and maximal performance. 51 * 52 * We take advantage of the split PC/NPC architecture to speed up logical 53 * single-stepping; when we copy an instruction out to the scratch space in 54 * the ulwp_t structure (held in the %g7 register on SPARC), we can 55 * effectively single step by setting the PC to our scratch space and leaving 56 * the NPC alone. This executes the replaced instruction and then continues 57 * on without having to reenter the kernel as with single- stepping. The 58 * obvious caveat is for instructions whose execution is PC dependant -- 59 * branches, call and link instructions (call and jmpl), and the rdpc 60 * instruction. These instructions cannot be executed in the manner described 61 * so they must be emulated in the kernel. 62 * 63 * Emulation for this small set of instructions if fairly simple; the most 64 * difficult part being emulating branch conditions. 65 * 66 * 67 * A Cache Heavy Portfolio 68 * 69 * It's important to note at this time that copying an instruction out to the 70 * ulwp_t scratch space in user-land is rather complicated. SPARC has 71 * separate data and instruction caches so any writes to the D$ (using a 72 * store instruction for example) aren't necessarily reflected in the I$. 73 * The flush instruction can be used to synchronize the two and must be used 74 * for any self-modifying code, but the flush instruction only applies to the 75 * primary address space (the absence of a flusha analogue to the flush 76 * instruction that accepts an ASI argument is an obvious omission from SPARC 77 * v9 where the notion of the alternate address space was introduced on 78 * SPARC). To correctly copy out the instruction we must use a block store 79 * that doesn't allocate in the D$ and ensures synchronization with the I$; 80 * see dtrace_blksuword32() for the implementation (this function uses 81 * ASI_BLK_COMMIT_S to write a block through the secondary ASI in the manner 82 * described). Refer to the UltraSPARC I/II manual for details on the 83 * ASI_BLK_COMMIT_S ASI. 84 * 85 * 86 * Return Subtleties 87 * 88 * When we're firing a return probe we need to expose the value returned by 89 * the function being traced. Since the function can set the return value 90 * in its last instruction, we need to fire the return probe only _after_ 91 * the effects of the instruction are apparent. For instructions that we 92 * emulate, we can call dtrace_probe() after we've performed the emulation; 93 * for instructions that we execute after we return to user-land, we set 94 * %pc to the instruction we copied out (as described above) and set %npc 95 * to a trap instruction stashed in the ulwp_t structure. After the traced 96 * instruction is executed, the trap instruction returns control to the 97 * kernel where we can fire the return probe. 98 * 99 * This need for a second trap in cases where we execute the traced 100 * instruction makes it all the more important to emulate the most common 101 * instructions to avoid the second trip in and out of the kernel. 102 * 103 * 104 * Making it Fast 105 * 106 * Since copying out an instruction is neither simple nor inexpensive for the 107 * CPU, we should attempt to avoid doing it in as many cases as possible. 108 * Since function entry and return are usually the most interesting probe 109 * sites, we attempt to tune the performance of the fasttrap provider around 110 * instructions typically in those places. 111 * 112 * Looking at a bunch of functions in libraries and executables reveals that 113 * most functions begin with either a save or a sethi (to setup a larger 114 * argument to the save) and end with a restore or an or (in the case of leaf 115 * functions). To try to improve performance, we emulate all of these 116 * instructions in the kernel. 117 * 118 * The save and restore instructions are a little tricky since they perform 119 * register window maniplulation. Rather than trying to tinker with the 120 * register windows from the kernel, we emulate the implicit add that takes 121 * place as part of those instructions and set the %pc to point to a simple 122 * save or restore we've hidden in the ulwp_t structure. If we're in a return 123 * probe so want to make it seem as though the tracepoint has been completely 124 * executed we need to remember that we've pulled this trick with restore and 125 * pull registers from the previous window (the one that we'll switch to once 126 * the simple store instruction is executed) rather than the current one. This 127 * is why in the case of emulating a restore we set the DTrace CPU flag 128 * CPU_DTRACE_FAKERESTORE before calling dtrace_probe() for the return probes 129 * (see fasttrap_return_common()). 130 */ 131 132 #define OP(x) ((x) >> 30) 133 #define OP2(x) (((x) >> 22) & 0x07) 134 #define OP3(x) (((x) >> 19) & 0x3f) 135 #define RCOND(x) (((x) >> 25) & 0x07) 136 #define COND(x) (((x) >> 25) & 0x0f) 137 #define A(x) (((x) >> 29) & 0x01) 138 #define I(x) (((x) >> 13) & 0x01) 139 #define RD(x) (((x) >> 25) & 0x1f) 140 #define RS1(x) (((x) >> 14) & 0x1f) 141 #define RS2(x) (((x) >> 0) & 0x1f) 142 #define CC(x) (((x) >> 20) & 0x03) 143 #define DISP16(x) ((((x) >> 6) & 0xc000) | ((x) & 0x3fff)) 144 #define DISP22(x) ((x) & 0x3fffff) 145 #define DISP19(x) ((x) & 0x7ffff) 146 #define DISP30(x) ((x) & 0x3fffffff) 147 #define SW_TRAP(x) ((x) & 0x7f) 148 149 #define OP3_OR 0x02 150 #define OP3_RD 0x28 151 #define OP3_JMPL 0x38 152 #define OP3_RETURN 0x39 153 #define OP3_TCC 0x3a 154 #define OP3_SAVE 0x3c 155 #define OP3_RESTORE 0x3d 156 157 #define OP3_PREFETCH 0x2d 158 #define OP3_CASA 0x3c 159 #define OP3_PREFETCHA 0x3d 160 #define OP3_CASXA 0x3e 161 162 #define OP2_ILLTRAP 0x0 163 #define OP2_BPcc 0x1 164 #define OP2_Bicc 0x2 165 #define OP2_BPr 0x3 166 #define OP2_SETHI 0x4 167 #define OP2_FBPfcc 0x5 168 #define OP2_FBfcc 0x6 169 170 #define R_G0 0 171 #define R_O0 8 172 #define R_SP 14 173 #define R_I0 24 174 #define R_I1 25 175 #define R_I2 26 176 #define R_I3 27 177 #define R_I4 28 178 179 /* 180 * Check the comment in fasttrap.h when changing these offsets or adding 181 * new instructions. 182 */ 183 #define FASTTRAP_OFF_SAVE 64 184 #define FASTTRAP_OFF_RESTORE 68 185 #define FASTTRAP_OFF_FTRET 72 186 #define FASTTRAP_OFF_RETURN 76 187 188 #define BREAKPOINT_INSTR 0x91d02001 /* ta 1 */ 189 190 /* 191 * Tunable to let users turn off the fancy save instruction optimization. 192 * If a program is non-ABI compliant, there's a possibility that the save 193 * instruction optimization could cause an error. 194 */ 195 int fasttrap_optimize_save = 1; 196 197 static uint64_t 198 fasttrap_anarg(struct regs *rp, int argno) 199 { 200 uint64_t value; 201 202 if (argno < 6) 203 return ((&rp->r_o0)[argno]); 204 205 if (curproc->p_model == DATAMODEL_NATIVE) { 206 struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS); 207 208 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 209 value = dtrace_fulword(&fr->fr_argd[argno]); 210 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR | 211 CPU_DTRACE_BADALIGN); 212 } else { 213 struct frame32 *fr = (struct frame32 *)rp->r_sp; 214 215 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 216 value = dtrace_fuword32(&fr->fr_argd[argno]); 217 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR | 218 CPU_DTRACE_BADALIGN); 219 } 220 221 return (value); 222 } 223 224 static ulong_t fasttrap_getreg(struct regs *, uint_t); 225 static void fasttrap_putreg(struct regs *, uint_t, ulong_t); 226 227 static void 228 fasttrap_usdt_args(fasttrap_probe_t *probe, struct regs *rp, 229 uint_t fake_restore, int argc, uintptr_t *argv) 230 { 231 int i, x, cap = MIN(argc, probe->ftp_nargs); 232 int inc = (fake_restore ? 16 : 0); 233 234 /* 235 * The only way we'll hit the fake_restore case is if a USDT probe is 236 * invoked as a tail-call. While it wouldn't be incorrect, we can 237 * avoid a call to fasttrap_getreg(), and safely use rp->r_sp 238 * directly since a tail-call can't be made if the invoked function 239 * would use the argument dump space (i.e. if there were more than 240 * 6 arguments). We take this shortcut because unconditionally rooting 241 * around for R_FP (R_SP + 16) would be unnecessarily painful. 242 */ 243 244 if (curproc->p_model == DATAMODEL_NATIVE) { 245 struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS); 246 uintptr_t v; 247 248 for (i = 0; i < cap; i++) { 249 x = probe->ftp_argmap[i]; 250 251 if (x < 6) 252 argv[i] = fasttrap_getreg(rp, R_O0 + x + inc); 253 else if (fasttrap_fulword(&fr->fr_argd[x], &v) != 0) 254 argv[i] = 0; 255 } 256 257 } else { 258 struct frame32 *fr = (struct frame32 *)rp->r_sp; 259 uint32_t v; 260 261 for (i = 0; i < cap; i++) { 262 x = probe->ftp_argmap[i]; 263 264 if (x < 6) 265 argv[i] = fasttrap_getreg(rp, R_O0 + x + inc); 266 else if (fasttrap_fuword32(&fr->fr_argd[x], &v) != 0) 267 argv[i] = 0; 268 } 269 } 270 271 for (; i < argc; i++) { 272 argv[i] = 0; 273 } 274 } 275 276 static void 277 fasttrap_return_common(struct regs *rp, uintptr_t pc, pid_t pid, 278 uint_t fake_restore) 279 { 280 fasttrap_tracepoint_t *tp; 281 fasttrap_bucket_t *bucket; 282 fasttrap_id_t *id; 283 kmutex_t *pid_mtx; 284 dtrace_icookie_t cookie; 285 286 pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock; 287 mutex_enter(pid_mtx); 288 bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]; 289 290 for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { 291 if (pid == tp->ftt_pid && pc == tp->ftt_pc && 292 tp->ftt_proc->ftpc_acount != 0) 293 break; 294 } 295 296 /* 297 * Don't sweat it if we can't find the tracepoint again; unlike 298 * when we're in fasttrap_pid_probe(), finding the tracepoint here 299 * is not essential to the correct execution of the process. 300 */ 301 if (tp == NULL || tp->ftt_retids == NULL) { 302 mutex_exit(pid_mtx); 303 return; 304 } 305 306 for (id = tp->ftt_retids; id != NULL; id = id->fti_next) { 307 fasttrap_probe_t *probe = id->fti_probe; 308 309 if (id->fti_ptype == DTFTP_POST_OFFSETS) { 310 if (probe->ftp_argmap != NULL && fake_restore) { 311 uintptr_t t[5]; 312 313 fasttrap_usdt_args(probe, rp, fake_restore, 314 sizeof (t) / sizeof (t[0]), t); 315 316 cookie = dtrace_interrupt_disable(); 317 DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE); 318 dtrace_probe(probe->ftp_id, t[0], t[1], 319 t[2], t[3], t[4]); 320 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE); 321 dtrace_interrupt_enable(cookie); 322 323 } else if (probe->ftp_argmap != NULL) { 324 uintptr_t t[5]; 325 326 fasttrap_usdt_args(probe, rp, fake_restore, 327 sizeof (t) / sizeof (t[0]), t); 328 329 dtrace_probe(probe->ftp_id, t[0], t[1], 330 t[2], t[3], t[4]); 331 332 } else if (fake_restore) { 333 uintptr_t arg0 = fasttrap_getreg(rp, R_I0); 334 uintptr_t arg1 = fasttrap_getreg(rp, R_I1); 335 uintptr_t arg2 = fasttrap_getreg(rp, R_I2); 336 uintptr_t arg3 = fasttrap_getreg(rp, R_I3); 337 uintptr_t arg4 = fasttrap_getreg(rp, R_I4); 338 339 cookie = dtrace_interrupt_disable(); 340 DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE); 341 dtrace_probe(probe->ftp_id, arg0, arg1, 342 arg2, arg3, arg4); 343 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE); 344 dtrace_interrupt_enable(cookie); 345 346 } else { 347 dtrace_probe(probe->ftp_id, rp->r_o0, rp->r_o1, 348 rp->r_o2, rp->r_o3, rp->r_o4); 349 } 350 351 continue; 352 } 353 354 /* 355 * If this is only a possible return point, we must 356 * be looking at a potential tail call in leaf context. 357 * If the %npc is still within this function, then we 358 * must have misidentified a jmpl as a tail-call when it 359 * is, in fact, part of a jump table. It would be nice to 360 * remove this tracepoint, but this is neither the time 361 * nor the place. 362 */ 363 if ((tp->ftt_flags & FASTTRAP_F_RETMAYBE) && 364 rp->r_npc - probe->ftp_faddr < probe->ftp_fsize) 365 continue; 366 367 /* 368 * It's possible for a function to branch to the delay slot 369 * of an instruction that we've identified as a return site. 370 * We can dectect this spurious return probe activation by 371 * observing that in this case %npc will be %pc + 4 and %npc 372 * will be inside the current function (unless the user is 373 * doing _crazy_ instruction picking in which case there's 374 * very little we can do). The second check is important 375 * in case the last instructions of a function make a tail- 376 * call to the function located immediately subsequent. 377 */ 378 if (rp->r_npc == rp->r_pc + 4 && 379 rp->r_npc - probe->ftp_faddr < probe->ftp_fsize) 380 continue; 381 382 /* 383 * The first argument is the offset of return tracepoint 384 * in the function; the remaining arguments are the return 385 * values. 386 * 387 * If fake_restore is set, we need to pull the return values 388 * out of the %i's rather than the %o's -- a little trickier. 389 */ 390 if (!fake_restore) { 391 dtrace_probe(probe->ftp_id, pc - probe->ftp_faddr, 392 rp->r_o0, rp->r_o1, rp->r_o2, rp->r_o3); 393 } else { 394 uintptr_t arg0 = fasttrap_getreg(rp, R_I0); 395 uintptr_t arg1 = fasttrap_getreg(rp, R_I1); 396 uintptr_t arg2 = fasttrap_getreg(rp, R_I2); 397 uintptr_t arg3 = fasttrap_getreg(rp, R_I3); 398 399 cookie = dtrace_interrupt_disable(); 400 DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE); 401 dtrace_probe(probe->ftp_id, pc - probe->ftp_faddr, 402 arg0, arg1, arg2, arg3); 403 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE); 404 dtrace_interrupt_enable(cookie); 405 } 406 } 407 408 mutex_exit(pid_mtx); 409 } 410 411 int 412 fasttrap_pid_probe(struct regs *rp) 413 { 414 proc_t *p = curproc; 415 fasttrap_tracepoint_t *tp, tp_local; 416 fasttrap_id_t *id; 417 pid_t pid; 418 uintptr_t pc = rp->r_pc; 419 uintptr_t npc = rp->r_npc; 420 uintptr_t orig_pc = pc; 421 fasttrap_bucket_t *bucket; 422 kmutex_t *pid_mtx; 423 uint_t fake_restore = 0, is_enabled = 0; 424 dtrace_icookie_t cookie; 425 426 /* 427 * It's possible that a user (in a veritable orgy of bad planning) 428 * could redirect this thread's flow of control before it reached the 429 * return probe fasttrap. In this case we need to kill the process 430 * since it's in a unrecoverable state. 431 */ 432 if (curthread->t_dtrace_step) { 433 ASSERT(curthread->t_dtrace_on); 434 fasttrap_sigtrap(p, curthread, pc); 435 return (0); 436 } 437 438 /* 439 * Clear all user tracing flags. 440 */ 441 curthread->t_dtrace_ft = 0; 442 curthread->t_dtrace_pc = 0; 443 curthread->t_dtrace_npc = 0; 444 curthread->t_dtrace_scrpc = 0; 445 curthread->t_dtrace_astpc = 0; 446 447 /* 448 * Treat a child created by a call to vfork(2) as if it were its 449 * parent. We know that there's only one thread of control in such a 450 * process: this one. 451 */ 452 while (p->p_flag & SVFORK) { 453 p = p->p_parent; 454 } 455 456 pid = p->p_pid; 457 pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock; 458 mutex_enter(pid_mtx); 459 bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]; 460 461 /* 462 * Lookup the tracepoint that the process just hit. 463 */ 464 for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { 465 if (pid == tp->ftt_pid && pc == tp->ftt_pc && 466 tp->ftt_proc->ftpc_acount != 0) 467 break; 468 } 469 470 /* 471 * If we couldn't find a matching tracepoint, either a tracepoint has 472 * been inserted without using the pid<pid> ioctl interface (see 473 * fasttrap_ioctl), or somehow we have mislaid this tracepoint. 474 */ 475 if (tp == NULL) { 476 mutex_exit(pid_mtx); 477 return (-1); 478 } 479 480 for (id = tp->ftt_ids; id != NULL; id = id->fti_next) { 481 fasttrap_probe_t *probe = id->fti_probe; 482 int isentry = (id->fti_ptype == DTFTP_ENTRY); 483 484 if (id->fti_ptype == DTFTP_IS_ENABLED) { 485 is_enabled = 1; 486 continue; 487 } 488 489 /* 490 * We note that this was an entry probe to help ustack() find 491 * the first caller. 492 */ 493 if (isentry) { 494 cookie = dtrace_interrupt_disable(); 495 DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY); 496 } 497 dtrace_probe(probe->ftp_id, rp->r_o0, rp->r_o1, rp->r_o2, 498 rp->r_o3, rp->r_o4); 499 if (isentry) { 500 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY); 501 dtrace_interrupt_enable(cookie); 502 } 503 } 504 505 /* 506 * We're about to do a bunch of work so we cache a local copy of 507 * the tracepoint to emulate the instruction, and then find the 508 * tracepoint again later if we need to light up any return probes. 509 */ 510 tp_local = *tp; 511 mutex_exit(pid_mtx); 512 tp = &tp_local; 513 514 /* 515 * If there's an is-enabled probe conntected to this tracepoint it 516 * means that there was a 'mov %g0, %o0' instruction that was placed 517 * there by DTrace when the binary was linked. As this probe is, in 518 * fact, enabled, we need to stuff 1 into %o0. Accordingly, we can 519 * bypass all the instruction emulation logic since we know the 520 * inevitable result. It's possible that a user could construct a 521 * scenario where the 'is-enabled' probe was on some other 522 * instruction, but that would be a rather exotic way to shoot oneself 523 * in the foot. 524 */ 525 if (is_enabled) { 526 rp->r_o0 = 1; 527 pc = rp->r_npc; 528 npc = pc + 4; 529 goto done; 530 } 531 532 /* 533 * We emulate certain types of instructions to ensure correctness 534 * (in the case of position dependent instructions) or optimize 535 * common cases. The rest we have the thread execute back in user- 536 * land. 537 */ 538 switch (tp->ftt_type) { 539 case FASTTRAP_T_SAVE: 540 { 541 int32_t imm; 542 543 /* 544 * This an optimization to let us handle function entry 545 * probes more efficiently. Many functions begin with a save 546 * instruction that follows the pattern: 547 * save %sp, <imm>, %sp 548 * 549 * Meanwhile, we've stashed the instruction: 550 * save %g1, %g0, %sp 551 * 552 * off of %g7, so all we have to do is stick the right value 553 * into %g1 and reset %pc to point to the instruction we've 554 * cleverly hidden (%npc should not be touched). 555 */ 556 557 imm = tp->ftt_instr << 19; 558 imm >>= 19; 559 rp->r_g1 = rp->r_sp + imm; 560 pc = rp->r_g7 + FASTTRAP_OFF_SAVE; 561 break; 562 } 563 564 case FASTTRAP_T_RESTORE: 565 { 566 ulong_t value; 567 uint_t rd; 568 569 /* 570 * This is an optimization to let us handle function 571 * return probes more efficiently. Most non-leaf functions 572 * end with the sequence: 573 * ret 574 * restore <reg>, <reg_or_imm>, %oX 575 * 576 * We've stashed the instruction: 577 * restore %g0, %g0, %g0 578 * 579 * off of %g7 so we just need to place the correct value 580 * in the right %i register (since after our fake-o 581 * restore, the %i's will become the %o's) and set the %pc 582 * to point to our hidden restore. We also set fake_restore to 583 * let fasttrap_return_common() know that it will find the 584 * return values in the %i's rather than the %o's. 585 */ 586 587 if (I(tp->ftt_instr)) { 588 int32_t imm; 589 590 imm = tp->ftt_instr << 19; 591 imm >>= 19; 592 value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + imm; 593 } else { 594 value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + 595 fasttrap_getreg(rp, RS2(tp->ftt_instr)); 596 } 597 598 /* 599 * Convert %o's to %i's; leave %g's as they are. 600 */ 601 rd = RD(tp->ftt_instr); 602 fasttrap_putreg(rp, ((rd & 0x18) == 0x8) ? rd + 16 : rd, value); 603 604 pc = rp->r_g7 + FASTTRAP_OFF_RESTORE; 605 fake_restore = 1; 606 break; 607 } 608 609 case FASTTRAP_T_RETURN: 610 { 611 uintptr_t target; 612 613 /* 614 * A return instruction is like a jmpl (without the link 615 * part) that executes an implicit restore. We've stashed 616 * the instruction: 617 * return %o0 618 * 619 * off of %g7 so we just need to place the target in %o0 620 * and set the %pc to point to the stashed return instruction. 621 * We use %o0 since that register disappears after the return 622 * executes, erasing any evidence of this tampering. 623 */ 624 if (I(tp->ftt_instr)) { 625 int32_t imm; 626 627 imm = tp->ftt_instr << 19; 628 imm >>= 19; 629 target = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + imm; 630 } else { 631 target = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + 632 fasttrap_getreg(rp, RS2(tp->ftt_instr)); 633 } 634 635 fasttrap_putreg(rp, R_O0, target); 636 637 pc = rp->r_g7 + FASTTRAP_OFF_RETURN; 638 fake_restore = 1; 639 break; 640 } 641 642 case FASTTRAP_T_OR: 643 { 644 ulong_t value; 645 646 if (I(tp->ftt_instr)) { 647 int32_t imm; 648 649 imm = tp->ftt_instr << 19; 650 imm >>= 19; 651 value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) | imm; 652 } else { 653 value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) | 654 fasttrap_getreg(rp, RS2(tp->ftt_instr)); 655 } 656 657 fasttrap_putreg(rp, RD(tp->ftt_instr), value); 658 pc = rp->r_npc; 659 npc = pc + 4; 660 break; 661 } 662 663 case FASTTRAP_T_SETHI: 664 if (RD(tp->ftt_instr) != R_G0) { 665 uint32_t imm32 = tp->ftt_instr << 10; 666 fasttrap_putreg(rp, RD(tp->ftt_instr), (ulong_t)imm32); 667 } 668 pc = rp->r_npc; 669 npc = pc + 4; 670 break; 671 672 case FASTTRAP_T_CCR: 673 { 674 uint_t c, v, z, n, taken; 675 uint_t ccr = rp->r_tstate >> TSTATE_CCR_SHIFT; 676 677 if (tp->ftt_cc != 0) 678 ccr >>= 4; 679 680 c = (ccr >> 0) & 1; 681 v = (ccr >> 1) & 1; 682 z = (ccr >> 2) & 1; 683 n = (ccr >> 3) & 1; 684 685 switch (tp->ftt_code) { 686 case 0x0: /* BN */ 687 taken = 0; break; 688 case 0x1: /* BE */ 689 taken = z; break; 690 case 0x2: /* BLE */ 691 taken = z | (n ^ v); break; 692 case 0x3: /* BL */ 693 taken = n ^ v; break; 694 case 0x4: /* BLEU */ 695 taken = c | z; break; 696 case 0x5: /* BCS (BLU) */ 697 taken = c; break; 698 case 0x6: /* BNEG */ 699 taken = n; break; 700 case 0x7: /* BVS */ 701 taken = v; break; 702 case 0x8: /* BA */ 703 /* 704 * We handle the BA case differently since the annul 705 * bit means something slightly different. 706 */ 707 panic("fasttrap: mishandled a branch"); 708 taken = 1; break; 709 case 0x9: /* BNE */ 710 taken = ~z; break; 711 case 0xa: /* BG */ 712 taken = ~(z | (n ^ v)); break; 713 case 0xb: /* BGE */ 714 taken = ~(n ^ v); break; 715 case 0xc: /* BGU */ 716 taken = ~(c | z); break; 717 case 0xd: /* BCC (BGEU) */ 718 taken = ~c; break; 719 case 0xe: /* BPOS */ 720 taken = ~n; break; 721 case 0xf: /* BVC */ 722 taken = ~v; break; 723 } 724 725 if (taken & 1) { 726 pc = rp->r_npc; 727 npc = tp->ftt_dest; 728 } else if (tp->ftt_flags & FASTTRAP_F_ANNUL) { 729 /* 730 * Untaken annulled branches don't execute the 731 * instruction in the delay slot. 732 */ 733 pc = rp->r_npc + 4; 734 npc = pc + 4; 735 } else { 736 pc = rp->r_npc; 737 npc = pc + 4; 738 } 739 break; 740 } 741 742 case FASTTRAP_T_FCC: 743 { 744 uint_t fcc; 745 uint_t taken; 746 uint64_t fsr; 747 748 dtrace_getfsr(&fsr); 749 750 if (tp->ftt_cc == 0) { 751 fcc = (fsr >> 10) & 0x3; 752 } else { 753 uint_t shift; 754 ASSERT(tp->ftt_cc <= 3); 755 shift = 30 + tp->ftt_cc * 2; 756 fcc = (fsr >> shift) & 0x3; 757 } 758 759 switch (tp->ftt_code) { 760 case 0x0: /* FBN */ 761 taken = (1 << fcc) & (0|0|0|0); break; 762 case 0x1: /* FBNE */ 763 taken = (1 << fcc) & (8|4|2|0); break; 764 case 0x2: /* FBLG */ 765 taken = (1 << fcc) & (0|4|2|0); break; 766 case 0x3: /* FBUL */ 767 taken = (1 << fcc) & (8|0|2|0); break; 768 case 0x4: /* FBL */ 769 taken = (1 << fcc) & (0|0|2|0); break; 770 case 0x5: /* FBUG */ 771 taken = (1 << fcc) & (8|4|0|0); break; 772 case 0x6: /* FBG */ 773 taken = (1 << fcc) & (0|4|0|0); break; 774 case 0x7: /* FBU */ 775 taken = (1 << fcc) & (8|0|0|0); break; 776 case 0x8: /* FBA */ 777 /* 778 * We handle the FBA case differently since the annul 779 * bit means something slightly different. 780 */ 781 panic("fasttrap: mishandled a branch"); 782 taken = (1 << fcc) & (8|4|2|1); break; 783 case 0x9: /* FBE */ 784 taken = (1 << fcc) & (0|0|0|1); break; 785 case 0xa: /* FBUE */ 786 taken = (1 << fcc) & (8|0|0|1); break; 787 case 0xb: /* FBGE */ 788 taken = (1 << fcc) & (0|4|0|1); break; 789 case 0xc: /* FBUGE */ 790 taken = (1 << fcc) & (8|4|0|1); break; 791 case 0xd: /* FBLE */ 792 taken = (1 << fcc) & (0|0|2|1); break; 793 case 0xe: /* FBULE */ 794 taken = (1 << fcc) & (8|0|2|1); break; 795 case 0xf: /* FBO */ 796 taken = (1 << fcc) & (0|4|2|1); break; 797 } 798 799 if (taken) { 800 pc = rp->r_npc; 801 npc = tp->ftt_dest; 802 } else if (tp->ftt_flags & FASTTRAP_F_ANNUL) { 803 /* 804 * Untaken annulled branches don't execute the 805 * instruction in the delay slot. 806 */ 807 pc = rp->r_npc + 4; 808 npc = pc + 4; 809 } else { 810 pc = rp->r_npc; 811 npc = pc + 4; 812 } 813 break; 814 } 815 816 case FASTTRAP_T_REG: 817 { 818 int64_t value; 819 uint_t taken; 820 uint_t reg = RS1(tp->ftt_instr); 821 822 /* 823 * An ILP32 process shouldn't be using a branch predicated on 824 * an %i or an %l since it would violate the ABI. It's a 825 * violation of the ABI because we can't ensure deterministic 826 * behavior. We should have identified this case when we 827 * enabled the probe. 828 */ 829 ASSERT(p->p_model == DATAMODEL_LP64 || reg < 16); 830 831 value = (int64_t)fasttrap_getreg(rp, reg); 832 833 switch (tp->ftt_code) { 834 case 0x1: /* BRZ */ 835 taken = (value == 0); break; 836 case 0x2: /* BRLEZ */ 837 taken = (value <= 0); break; 838 case 0x3: /* BRLZ */ 839 taken = (value < 0); break; 840 case 0x5: /* BRNZ */ 841 taken = (value != 0); break; 842 case 0x6: /* BRGZ */ 843 taken = (value > 0); break; 844 case 0x7: /* BRGEZ */ 845 taken = (value >= 0); break; 846 default: 847 case 0x0: 848 case 0x4: 849 panic("fasttrap: mishandled a branch"); 850 } 851 852 if (taken) { 853 pc = rp->r_npc; 854 npc = tp->ftt_dest; 855 } else if (tp->ftt_flags & FASTTRAP_F_ANNUL) { 856 /* 857 * Untaken annulled branches don't execute the 858 * instruction in the delay slot. 859 */ 860 pc = rp->r_npc + 4; 861 npc = pc + 4; 862 } else { 863 pc = rp->r_npc; 864 npc = pc + 4; 865 } 866 break; 867 } 868 869 case FASTTRAP_T_ALWAYS: 870 /* 871 * BAs, BA,As... 872 */ 873 874 if (tp->ftt_flags & FASTTRAP_F_ANNUL) { 875 /* 876 * Annulled branch always instructions never execute 877 * the instruction in the delay slot. 878 */ 879 pc = tp->ftt_dest; 880 npc = tp->ftt_dest + 4; 881 } else { 882 pc = rp->r_npc; 883 npc = tp->ftt_dest; 884 } 885 break; 886 887 case FASTTRAP_T_RDPC: 888 fasttrap_putreg(rp, RD(tp->ftt_instr), rp->r_pc); 889 pc = rp->r_npc; 890 npc = pc + 4; 891 break; 892 893 case FASTTRAP_T_CALL: 894 /* 895 * It's a call _and_ link remember... 896 */ 897 rp->r_o7 = rp->r_pc; 898 pc = rp->r_npc; 899 npc = tp->ftt_dest; 900 break; 901 902 case FASTTRAP_T_JMPL: 903 pc = rp->r_npc; 904 905 if (I(tp->ftt_instr)) { 906 uint_t rs1 = RS1(tp->ftt_instr); 907 int32_t imm; 908 909 imm = tp->ftt_instr << 19; 910 imm >>= 19; 911 npc = fasttrap_getreg(rp, rs1) + imm; 912 } else { 913 uint_t rs1 = RS1(tp->ftt_instr); 914 uint_t rs2 = RS2(tp->ftt_instr); 915 916 npc = fasttrap_getreg(rp, rs1) + 917 fasttrap_getreg(rp, rs2); 918 } 919 920 /* 921 * Do the link part of the jump-and-link instruction. 922 */ 923 fasttrap_putreg(rp, RD(tp->ftt_instr), rp->r_pc); 924 925 break; 926 927 case FASTTRAP_T_COMMON: 928 { 929 curthread->t_dtrace_scrpc = rp->r_g7; 930 curthread->t_dtrace_astpc = rp->r_g7 + FASTTRAP_OFF_FTRET; 931 932 /* 933 * Copy the instruction to a reserved location in the 934 * user-land thread structure, then set the PC to that 935 * location and leave the NPC alone. We take pains to ensure 936 * consistency in the instruction stream (See SPARC 937 * Architecture Manual Version 9, sections 8.4.7, A.20, and 938 * H.1.6; UltraSPARC I/II User's Manual, sections 3.1.1.1, 939 * and 13.6.4) by using the ASI ASI_BLK_COMMIT_S to copy the 940 * instruction into the user's address space without 941 * bypassing the I$. There's no AS_USER version of this ASI 942 * (as exist for other ASIs) so we use the lofault 943 * mechanism to catch faults. 944 */ 945 if (dtrace_blksuword32(rp->r_g7, &tp->ftt_instr, 1) == -1) { 946 /* 947 * If the copyout fails, then the process's state 948 * is not consistent (the effects of the traced 949 * instruction will never be seen). This process 950 * cannot be allowed to continue execution. 951 */ 952 fasttrap_sigtrap(curproc, curthread, pc); 953 return (0); 954 } 955 956 curthread->t_dtrace_pc = pc; 957 curthread->t_dtrace_npc = npc; 958 curthread->t_dtrace_on = 1; 959 960 pc = curthread->t_dtrace_scrpc; 961 962 if (tp->ftt_retids != NULL) { 963 curthread->t_dtrace_step = 1; 964 curthread->t_dtrace_ret = 1; 965 npc = curthread->t_dtrace_astpc; 966 } 967 break; 968 } 969 970 default: 971 panic("fasttrap: mishandled an instruction"); 972 } 973 974 /* 975 * This bit me in the ass a couple of times, so lets toss this 976 * in as a cursory sanity check. 977 */ 978 ASSERT(pc != rp->r_g7 + 4); 979 ASSERT(pc != rp->r_g7 + 8); 980 981 done: 982 /* 983 * If there were no return probes when we first found the tracepoint, 984 * we should feel no obligation to honor any return probes that were 985 * subsequently enabled -- they'll just have to wait until the next 986 * time around. 987 */ 988 if (tp->ftt_retids != NULL) { 989 /* 990 * We need to wait until the results of the instruction are 991 * apparent before invoking any return probes. If this 992 * instruction was emulated we can just call 993 * fasttrap_return_common(); if it needs to be executed, we 994 * need to wait until we return to the kernel. 995 */ 996 if (tp->ftt_type != FASTTRAP_T_COMMON) { 997 fasttrap_return_common(rp, orig_pc, pid, fake_restore); 998 } else { 999 ASSERT(curthread->t_dtrace_ret != 0); 1000 ASSERT(curthread->t_dtrace_pc == orig_pc); 1001 ASSERT(curthread->t_dtrace_scrpc == rp->r_g7); 1002 ASSERT(npc == curthread->t_dtrace_astpc); 1003 } 1004 } 1005 1006 ASSERT(pc != 0); 1007 rp->r_pc = pc; 1008 rp->r_npc = npc; 1009 1010 return (0); 1011 } 1012 1013 int 1014 fasttrap_return_probe(struct regs *rp) 1015 { 1016 proc_t *p = ttoproc(curthread); 1017 pid_t pid; 1018 uintptr_t pc = curthread->t_dtrace_pc; 1019 uintptr_t npc = curthread->t_dtrace_npc; 1020 1021 curthread->t_dtrace_pc = 0; 1022 curthread->t_dtrace_npc = 0; 1023 curthread->t_dtrace_scrpc = 0; 1024 curthread->t_dtrace_astpc = 0; 1025 1026 /* 1027 * Treat a child created by a call to vfork(2) as if it were its 1028 * parent. We know there's only one thread of control in such a 1029 * process: this one. 1030 */ 1031 while (p->p_flag & SVFORK) { 1032 p = p->p_parent; 1033 } 1034 1035 /* 1036 * We set the %pc and %npc to their values when the traced 1037 * instruction was initially executed so that it appears to 1038 * dtrace_probe() that we're on the original instruction, and so that 1039 * the user can't easily detect our complex web of lies. 1040 * dtrace_return_probe() (our caller) will correctly set %pc and %npc 1041 * after we return. 1042 */ 1043 rp->r_pc = pc; 1044 rp->r_npc = npc; 1045 1046 pid = p->p_pid; 1047 fasttrap_return_common(rp, pc, pid, 0); 1048 1049 return (0); 1050 } 1051 1052 int 1053 fasttrap_tracepoint_install(proc_t *p, fasttrap_tracepoint_t *tp) 1054 { 1055 fasttrap_instr_t instr = FASTTRAP_INSTR; 1056 1057 if (uwrite(p, &instr, 4, tp->ftt_pc) != 0) 1058 return (-1); 1059 1060 return (0); 1061 } 1062 1063 int 1064 fasttrap_tracepoint_remove(proc_t *p, fasttrap_tracepoint_t *tp) 1065 { 1066 fasttrap_instr_t instr; 1067 1068 /* 1069 * Distinguish between read or write failures and a changed 1070 * instruction. 1071 */ 1072 if (uread(p, &instr, 4, tp->ftt_pc) != 0) 1073 return (0); 1074 if (instr != FASTTRAP_INSTR && instr != BREAKPOINT_INSTR) 1075 return (0); 1076 if (uwrite(p, &tp->ftt_instr, 4, tp->ftt_pc) != 0) 1077 return (-1); 1078 1079 return (0); 1080 } 1081 1082 int 1083 fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, uintptr_t pc, 1084 fasttrap_probe_type_t type) 1085 { 1086 uint32_t instr; 1087 int32_t disp; 1088 1089 /* 1090 * Read the instruction at the given address out of the process's 1091 * address space. We don't have to worry about a debugger 1092 * changing this instruction before we overwrite it with our trap 1093 * instruction since P_PR_LOCK is set. 1094 */ 1095 if (uread(p, &instr, 4, pc) != 0) 1096 return (-1); 1097 1098 /* 1099 * Decode the instruction to fill in the probe flags. We can have 1100 * the process execute most instructions on its own using a pc/npc 1101 * trick, but pc-relative control transfer present a problem since 1102 * we're relocating the instruction. We emulate these instructions 1103 * in the kernel. We assume a default type and over-write that as 1104 * needed. 1105 * 1106 * pc-relative instructions must be emulated for correctness; 1107 * other instructions (which represent a large set of commonly traced 1108 * instructions) are emulated or otherwise optimized for performance. 1109 */ 1110 tp->ftt_type = FASTTRAP_T_COMMON; 1111 if (OP(instr) == 1) { 1112 /* 1113 * Call instructions. 1114 */ 1115 tp->ftt_type = FASTTRAP_T_CALL; 1116 disp = DISP30(instr) << 2; 1117 tp->ftt_dest = pc + (intptr_t)disp; 1118 1119 } else if (OP(instr) == 0) { 1120 /* 1121 * Branch instructions. 1122 * 1123 * Unconditional branches need careful attention when they're 1124 * annulled: annulled unconditional branches never execute 1125 * the instruction in the delay slot. 1126 */ 1127 switch (OP2(instr)) { 1128 case OP2_ILLTRAP: 1129 case 0x7: 1130 /* 1131 * The compiler may place an illtrap after a call to 1132 * a function that returns a structure. In the case of 1133 * a returned structure, the compiler places an illtrap 1134 * whose const22 field is the size of the returned 1135 * structure immediately following the delay slot of 1136 * the call. To stay out of the way, we refuse to 1137 * place tracepoints on top of illtrap instructions. 1138 * 1139 * This is one of the dumbest architectural decisions 1140 * I've ever had to work around. 1141 * 1142 * We also identify the only illegal op2 value (See 1143 * SPARC Architecture Manual Version 9, E.2 table 31). 1144 */ 1145 return (-1); 1146 1147 case OP2_BPcc: 1148 if (COND(instr) == 8) { 1149 tp->ftt_type = FASTTRAP_T_ALWAYS; 1150 } else { 1151 /* 1152 * Check for an illegal instruction. 1153 */ 1154 if (CC(instr) & 1) 1155 return (-1); 1156 tp->ftt_type = FASTTRAP_T_CCR; 1157 tp->ftt_cc = CC(instr); 1158 tp->ftt_code = COND(instr); 1159 } 1160 1161 if (A(instr) != 0) 1162 tp->ftt_flags |= FASTTRAP_F_ANNUL; 1163 1164 disp = DISP19(instr); 1165 disp <<= 13; 1166 disp >>= 11; 1167 tp->ftt_dest = pc + (intptr_t)disp; 1168 break; 1169 1170 case OP2_Bicc: 1171 if (COND(instr) == 8) { 1172 tp->ftt_type = FASTTRAP_T_ALWAYS; 1173 } else { 1174 tp->ftt_type = FASTTRAP_T_CCR; 1175 tp->ftt_cc = 0; 1176 tp->ftt_code = COND(instr); 1177 } 1178 1179 if (A(instr) != 0) 1180 tp->ftt_flags |= FASTTRAP_F_ANNUL; 1181 1182 disp = DISP22(instr); 1183 disp <<= 10; 1184 disp >>= 8; 1185 tp->ftt_dest = pc + (intptr_t)disp; 1186 break; 1187 1188 case OP2_BPr: 1189 /* 1190 * Check for an illegal instruction. 1191 */ 1192 if ((RCOND(instr) & 3) == 0) 1193 return (-1); 1194 1195 /* 1196 * It's a violation of the v8plus ABI to use a 1197 * register-predicated branch in a 32-bit app if 1198 * the register used is an %l or an %i (%gs and %os 1199 * are legit because they're not saved to the stack 1200 * in 32-bit words when we take a trap). 1201 */ 1202 if (p->p_model == DATAMODEL_ILP32 && RS1(instr) >= 16) 1203 return (-1); 1204 1205 tp->ftt_type = FASTTRAP_T_REG; 1206 if (A(instr) != 0) 1207 tp->ftt_flags |= FASTTRAP_F_ANNUL; 1208 disp = DISP16(instr); 1209 disp <<= 16; 1210 disp >>= 14; 1211 tp->ftt_dest = pc + (intptr_t)disp; 1212 tp->ftt_code = RCOND(instr); 1213 break; 1214 1215 case OP2_SETHI: 1216 tp->ftt_type = FASTTRAP_T_SETHI; 1217 break; 1218 1219 case OP2_FBPfcc: 1220 if (COND(instr) == 8) { 1221 tp->ftt_type = FASTTRAP_T_ALWAYS; 1222 } else { 1223 tp->ftt_type = FASTTRAP_T_FCC; 1224 tp->ftt_cc = CC(instr); 1225 tp->ftt_code = COND(instr); 1226 } 1227 1228 if (A(instr) != 0) 1229 tp->ftt_flags |= FASTTRAP_F_ANNUL; 1230 1231 disp = DISP19(instr); 1232 disp <<= 13; 1233 disp >>= 11; 1234 tp->ftt_dest = pc + (intptr_t)disp; 1235 break; 1236 1237 case OP2_FBfcc: 1238 if (COND(instr) == 8) { 1239 tp->ftt_type = FASTTRAP_T_ALWAYS; 1240 } else { 1241 tp->ftt_type = FASTTRAP_T_FCC; 1242 tp->ftt_cc = 0; 1243 tp->ftt_code = COND(instr); 1244 } 1245 1246 if (A(instr) != 0) 1247 tp->ftt_flags |= FASTTRAP_F_ANNUL; 1248 1249 disp = DISP22(instr); 1250 disp <<= 10; 1251 disp >>= 8; 1252 tp->ftt_dest = pc + (intptr_t)disp; 1253 break; 1254 } 1255 1256 } else if (OP(instr) == 2) { 1257 switch (OP3(instr)) { 1258 case OP3_RETURN: 1259 tp->ftt_type = FASTTRAP_T_RETURN; 1260 break; 1261 1262 case OP3_JMPL: 1263 tp->ftt_type = FASTTRAP_T_JMPL; 1264 break; 1265 1266 case OP3_RD: 1267 if (RS1(instr) == 5) 1268 tp->ftt_type = FASTTRAP_T_RDPC; 1269 break; 1270 1271 case OP3_SAVE: 1272 /* 1273 * We optimize for save instructions at function 1274 * entry; see the comment in fasttrap_pid_probe() 1275 * (near FASTTRAP_T_SAVE) for details. 1276 */ 1277 if (fasttrap_optimize_save != 0 && 1278 type == DTFTP_ENTRY && 1279 I(instr) == 1 && RD(instr) == R_SP) 1280 tp->ftt_type = FASTTRAP_T_SAVE; 1281 break; 1282 1283 case OP3_RESTORE: 1284 /* 1285 * We optimize restore instructions at function 1286 * return; see the comment in fasttrap_pid_probe() 1287 * (near FASTTRAP_T_RESTORE) for details. 1288 * 1289 * rd must be an %o or %g register. 1290 */ 1291 if ((RD(instr) & 0x10) == 0) 1292 tp->ftt_type = FASTTRAP_T_RESTORE; 1293 break; 1294 1295 case OP3_OR: 1296 /* 1297 * A large proportion of instructions in the delay 1298 * slot of retl instructions are or's so we emulate 1299 * these downstairs as an optimization. 1300 */ 1301 tp->ftt_type = FASTTRAP_T_OR; 1302 break; 1303 1304 case OP3_TCC: 1305 /* 1306 * Breakpoint instructions are effectively position- 1307 * dependent since the debugger uses the %pc value 1308 * to lookup which breakpoint was executed. As a 1309 * result, we can't actually instrument breakpoints. 1310 */ 1311 if (SW_TRAP(instr) == ST_BREAKPOINT) 1312 return (-1); 1313 break; 1314 1315 case 0x19: 1316 case 0x1d: 1317 case 0x29: 1318 case 0x33: 1319 case 0x3f: 1320 /* 1321 * Identify illegal instructions (See SPARC 1322 * Architecture Manual Version 9, E.2 table 32). 1323 */ 1324 return (-1); 1325 } 1326 } else if (OP(instr) == 3) { 1327 uint32_t op3 = OP3(instr); 1328 1329 /* 1330 * Identify illegal instructions (See SPARC Architecture 1331 * Manual Version 9, E.2 table 33). 1332 */ 1333 if ((op3 & 0x28) == 0x28) { 1334 if (op3 != OP3_PREFETCH && op3 != OP3_CASA && 1335 op3 != OP3_PREFETCHA && op3 != OP3_CASXA) 1336 return (-1); 1337 } else { 1338 if ((op3 & 0x0f) == 0x0c || (op3 & 0x3b) == 0x31) 1339 return (-1); 1340 } 1341 } 1342 1343 tp->ftt_instr = instr; 1344 1345 /* 1346 * We don't know how this tracepoint is going to be used, but in case 1347 * it's used as part of a function return probe, we need to indicate 1348 * whether it's always a return site or only potentially a return 1349 * site. If it's part of a return probe, it's always going to be a 1350 * return from that function if it's a restore instruction or if 1351 * the previous instruction was a return. If we could reliably 1352 * distinguish jump tables from return sites, this wouldn't be 1353 * necessary. 1354 */ 1355 if (tp->ftt_type != FASTTRAP_T_RESTORE && 1356 (uread(p, &instr, 4, pc - sizeof (instr)) != 0 || 1357 !(OP(instr) == 2 && OP3(instr) == OP3_RETURN))) 1358 tp->ftt_flags |= FASTTRAP_F_RETMAYBE; 1359 1360 return (0); 1361 } 1362 1363 /*ARGSUSED*/ 1364 uint64_t 1365 fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno, 1366 int aframes) 1367 { 1368 return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, argno)); 1369 } 1370 1371 /*ARGSUSED*/ 1372 uint64_t 1373 fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno, 1374 int aframes) 1375 { 1376 return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, argno)); 1377 } 1378 1379 static uint64_t fasttrap_getreg_fast_cnt; 1380 static uint64_t fasttrap_getreg_mpcb_cnt; 1381 static uint64_t fasttrap_getreg_slow_cnt; 1382 1383 static ulong_t 1384 fasttrap_getreg(struct regs *rp, uint_t reg) 1385 { 1386 ulong_t value; 1387 dtrace_icookie_t cookie; 1388 struct machpcb *mpcb; 1389 extern ulong_t dtrace_getreg_win(uint_t, uint_t); 1390 1391 /* 1392 * We have the %os and %gs in our struct regs, but if we need to 1393 * snag a %l or %i we need to go scrounging around in the process's 1394 * address space. 1395 */ 1396 if (reg == 0) 1397 return (0); 1398 1399 if (reg < 16) 1400 return ((&rp->r_g1)[reg - 1]); 1401 1402 /* 1403 * Before we look at the user's stack, we'll check the register 1404 * windows to see if the information we want is in there. 1405 */ 1406 cookie = dtrace_interrupt_disable(); 1407 if (dtrace_getotherwin() > 0) { 1408 value = dtrace_getreg_win(reg, 1); 1409 dtrace_interrupt_enable(cookie); 1410 1411 atomic_inc_64(&fasttrap_getreg_fast_cnt); 1412 1413 return (value); 1414 } 1415 dtrace_interrupt_enable(cookie); 1416 1417 /* 1418 * First check the machpcb structure to see if we've already read 1419 * in the register window we're looking for; if we haven't, (and 1420 * we probably haven't) try to copy in the value of the register. 1421 */ 1422 /* LINTED - alignment */ 1423 mpcb = (struct machpcb *)((caddr_t)rp - REGOFF); 1424 1425 if (get_udatamodel() == DATAMODEL_NATIVE) { 1426 struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS); 1427 1428 if (mpcb->mpcb_wbcnt > 0) { 1429 struct rwindow *rwin = (void *)mpcb->mpcb_wbuf; 1430 int i = mpcb->mpcb_wbcnt; 1431 do { 1432 i--; 1433 if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp) 1434 continue; 1435 1436 atomic_inc_64(&fasttrap_getreg_mpcb_cnt); 1437 return (rwin[i].rw_local[reg - 16]); 1438 } while (i > 0); 1439 } 1440 1441 if (fasttrap_fulword(&fr->fr_local[reg - 16], &value) != 0) 1442 goto err; 1443 } else { 1444 struct frame32 *fr = 1445 (struct frame32 *)(uintptr_t)(caddr32_t)rp->r_sp; 1446 uint32_t *v32 = (uint32_t *)&value; 1447 1448 if (mpcb->mpcb_wbcnt > 0) { 1449 struct rwindow32 *rwin = (void *)mpcb->mpcb_wbuf; 1450 int i = mpcb->mpcb_wbcnt; 1451 do { 1452 i--; 1453 if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp) 1454 continue; 1455 1456 atomic_inc_64(&fasttrap_getreg_mpcb_cnt); 1457 return (rwin[i].rw_local[reg - 16]); 1458 } while (i > 0); 1459 } 1460 1461 if (fasttrap_fuword32(&fr->fr_local[reg - 16], &v32[1]) != 0) 1462 goto err; 1463 1464 v32[0] = 0; 1465 } 1466 1467 atomic_inc_64(&fasttrap_getreg_slow_cnt); 1468 return (value); 1469 1470 err: 1471 /* 1472 * If the copy in failed, the process will be in a irrecoverable 1473 * state, and we have no choice but to kill it. 1474 */ 1475 psignal(ttoproc(curthread), SIGILL); 1476 return (0); 1477 } 1478 1479 static uint64_t fasttrap_putreg_fast_cnt; 1480 static uint64_t fasttrap_putreg_mpcb_cnt; 1481 static uint64_t fasttrap_putreg_slow_cnt; 1482 1483 static void 1484 fasttrap_putreg(struct regs *rp, uint_t reg, ulong_t value) 1485 { 1486 dtrace_icookie_t cookie; 1487 struct machpcb *mpcb; 1488 extern void dtrace_putreg_win(uint_t, ulong_t); 1489 1490 if (reg == 0) 1491 return; 1492 1493 if (reg < 16) { 1494 (&rp->r_g1)[reg - 1] = value; 1495 return; 1496 } 1497 1498 /* 1499 * If the user process is still using some register windows, we 1500 * can just place the value in the correct window. 1501 */ 1502 cookie = dtrace_interrupt_disable(); 1503 if (dtrace_getotherwin() > 0) { 1504 dtrace_putreg_win(reg, value); 1505 dtrace_interrupt_enable(cookie); 1506 atomic_inc_64(&fasttrap_putreg_fast_cnt); 1507 return; 1508 } 1509 dtrace_interrupt_enable(cookie); 1510 1511 /* 1512 * First see if there's a copy of the register window in the 1513 * machpcb structure that we can modify; if there isn't try to 1514 * copy out the value. If that fails, we try to create a new 1515 * register window in the machpcb structure. While this isn't 1516 * _precisely_ the intended use of the machpcb structure, it 1517 * can't cause any problems since we know at this point in the 1518 * code that all of the user's data have been flushed out of the 1519 * register file (since %otherwin is 0). 1520 */ 1521 /* LINTED - alignment */ 1522 mpcb = (struct machpcb *)((caddr_t)rp - REGOFF); 1523 1524 if (get_udatamodel() == DATAMODEL_NATIVE) { 1525 struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS); 1526 /* LINTED - alignment */ 1527 struct rwindow *rwin = (struct rwindow *)mpcb->mpcb_wbuf; 1528 1529 if (mpcb->mpcb_wbcnt > 0) { 1530 int i = mpcb->mpcb_wbcnt; 1531 do { 1532 i--; 1533 if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp) 1534 continue; 1535 1536 rwin[i].rw_local[reg - 16] = value; 1537 atomic_inc_64(&fasttrap_putreg_mpcb_cnt); 1538 return; 1539 } while (i > 0); 1540 } 1541 1542 if (fasttrap_sulword(&fr->fr_local[reg - 16], value) != 0) { 1543 if (mpcb->mpcb_wbcnt >= MAXWIN || copyin(fr, 1544 &rwin[mpcb->mpcb_wbcnt], sizeof (*rwin)) != 0) 1545 goto err; 1546 1547 rwin[mpcb->mpcb_wbcnt].rw_local[reg - 16] = value; 1548 mpcb->mpcb_spbuf[mpcb->mpcb_wbcnt] = (caddr_t)rp->r_sp; 1549 mpcb->mpcb_wbcnt++; 1550 atomic_inc_64(&fasttrap_putreg_mpcb_cnt); 1551 return; 1552 } 1553 } else { 1554 struct frame32 *fr = 1555 (struct frame32 *)(uintptr_t)(caddr32_t)rp->r_sp; 1556 /* LINTED - alignment */ 1557 struct rwindow32 *rwin = (struct rwindow32 *)mpcb->mpcb_wbuf; 1558 uint32_t v32 = (uint32_t)value; 1559 1560 if (mpcb->mpcb_wbcnt > 0) { 1561 int i = mpcb->mpcb_wbcnt; 1562 do { 1563 i--; 1564 if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp) 1565 continue; 1566 1567 rwin[i].rw_local[reg - 16] = v32; 1568 atomic_inc_64(&fasttrap_putreg_mpcb_cnt); 1569 return; 1570 } while (i > 0); 1571 } 1572 1573 if (fasttrap_suword32(&fr->fr_local[reg - 16], v32) != 0) { 1574 if (mpcb->mpcb_wbcnt >= MAXWIN || copyin(fr, 1575 &rwin[mpcb->mpcb_wbcnt], sizeof (*rwin)) != 0) 1576 goto err; 1577 1578 rwin[mpcb->mpcb_wbcnt].rw_local[reg - 16] = v32; 1579 mpcb->mpcb_spbuf[mpcb->mpcb_wbcnt] = (caddr_t)rp->r_sp; 1580 mpcb->mpcb_wbcnt++; 1581 atomic_inc_64(&fasttrap_putreg_mpcb_cnt); 1582 return; 1583 } 1584 } 1585 1586 atomic_inc_64(&fasttrap_putreg_slow_cnt); 1587 return; 1588 1589 err: 1590 /* 1591 * If we couldn't record this register's value, the process is in an 1592 * irrecoverable state and we have no choice but to euthanize it. 1593 */ 1594 psignal(ttoproc(curthread), SIGILL); 1595 } 1596