1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/fasttrap_isa.h> 30 #include <sys/fasttrap_impl.h> 31 #include <sys/dtrace.h> 32 #include <sys/dtrace_impl.h> 33 #include <sys/cmn_err.h> 34 #include <sys/frame.h> 35 #include <sys/stack.h> 36 #include <sys/sysmacros.h> 37 #include <sys/trap.h> 38 39 #include <v9/sys/machpcb.h> 40 #include <v9/sys/privregs.h> 41 42 /* 43 * Lossless User-Land Tracing on SPARC 44 * ----------------------------------- 45 * 46 * The Basic Idea 47 * 48 * The most important design constraint is, of course, correct execution of 49 * the user thread above all else. The next most important goal is rapid 50 * execution. We combine execution of instructions in user-land with 51 * emulation of certain instructions in the kernel to aim for complete 52 * correctness and maximal performance. 53 * 54 * We take advantage of the split PC/NPC architecture to speed up logical 55 * single-stepping; when we copy an instruction out to the scratch space in 56 * the ulwp_t structure (held in the %g7 register on SPARC), we can 57 * effectively single step by setting the PC to our scratch space and leaving 58 * the NPC alone. This executes the replaced instruction and then continues 59 * on without having to reenter the kernel as with single- stepping. The 60 * obvious caveat is for instructions whose execution is PC dependant -- 61 * branches, call and link instructions (call and jmpl), and the rdpc 62 * instruction. These instructions cannot be executed in the manner described 63 * so they must be emulated in the kernel. 64 * 65 * Emulation for this small set of instructions if fairly simple; the most 66 * difficult part being emulating branch conditions. 67 * 68 * 69 * A Cache Heavy Portfolio 70 * 71 * It's important to note at this time that copying an instruction out to the 72 * ulwp_t scratch space in user-land is rather complicated. SPARC has 73 * separate data and instruction caches so any writes to the D$ (using a 74 * store instruction for example) aren't necessarily reflected in the I$. 75 * The flush instruction can be used to synchronize the two and must be used 76 * for any self-modifying code, but the flush instruction only applies to the 77 * primary address space (the absence of a flusha analogue to the flush 78 * instruction that accepts an ASI argument is an obvious omission from SPARC 79 * v9 where the notion of the alternate address space was introduced on 80 * SPARC). To correctly copy out the instruction we must use a block store 81 * that doesn't allocate in the D$ and ensures synchronization with the I$; 82 * see dtrace_blksuword32() for the implementation (this function uses 83 * ASI_BLK_COMMIT_S to write a block through the secondary ASI in the manner 84 * described). Refer to the UltraSPARC I/II manual for details on the 85 * ASI_BLK_COMMIT_S ASI. 86 * 87 * 88 * Return Subtleties 89 * 90 * When we're firing a return probe we need to expose the value returned by 91 * the function being traced. Since the function can set the return value 92 * in its last instruction, we need to fire the return probe only _after_ 93 * the effects of the instruction are apparent. For instructions that we 94 * emulate, we can call dtrace_probe() after we've performed the emulation; 95 * for instructions that we execute after we return to user-land, we set 96 * %pc to the instruction we copied out (as described above) and set %npc 97 * to a trap instruction stashed in the ulwp_t structure. After the traced 98 * instruction is executed, the trap instruction returns control to the 99 * kernel where we can fire the return probe. 100 * 101 * This need for a second trap in cases where we execute the traced 102 * instruction makes it all the more important to emulate the most common 103 * instructions to avoid the second trip in and out of the kernel. 104 * 105 * 106 * Making it Fast 107 * 108 * Since copying out an instruction is neither simple nor inexpensive for the 109 * CPU, we should attempt to avoid doing it in as many cases as possible. 110 * Since function entry and return are usually the most interesting probe 111 * sites, we attempt to tune the performance of the fasttrap provider around 112 * instructions typically in those places. 113 * 114 * Looking at a bunch of functions in libraries and executables reveals that 115 * most functions begin with either a save or a sethi (to setup a larger 116 * argument to the save) and end with a restore or an or (in the case of leaf 117 * functions). To try to improve performance, we emulate all of these 118 * instructions in the kernel. 119 * 120 * The save and restore instructions are a little tricky since they perform 121 * register window maniplulation. Rather than trying to tinker with the 122 * register windows from the kernel, we emulate the implicit add that takes 123 * place as part of those instructions and set the %pc to point to a simple 124 * save or restore we've hidden in the ulwp_t structure. If we're in a return 125 * probe so want to make it seem as though the tracepoint has been completely 126 * executed we need to remember that we've pulled this trick with restore and 127 * pull registers from the previous window (the one that we'll switch to once 128 * the simple store instruction is executed) rather than the current one. This 129 * is why in the case of emulating a restore we set the DTrace CPU flag 130 * CPU_DTRACE_FAKERESTORE before calling dtrace_probe() for the return probes 131 * (see fasttrap_return_common()). 132 */ 133 134 #define OP(x) ((x) >> 30) 135 #define OP2(x) (((x) >> 22) & 0x07) 136 #define OP3(x) (((x) >> 19) & 0x3f) 137 #define RCOND(x) (((x) >> 25) & 0x07) 138 #define COND(x) (((x) >> 25) & 0x0f) 139 #define A(x) (((x) >> 29) & 0x01) 140 #define I(x) (((x) >> 13) & 0x01) 141 #define RD(x) (((x) >> 25) & 0x1f) 142 #define RS1(x) (((x) >> 14) & 0x1f) 143 #define RS2(x) (((x) >> 0) & 0x1f) 144 #define CC(x) (((x) >> 20) & 0x03) 145 #define DISP16(x) ((((x) >> 6) & 0xc000) | ((x) & 0x3fff)) 146 #define DISP22(x) ((x) & 0x3fffff) 147 #define DISP19(x) ((x) & 0x7ffff) 148 #define DISP30(x) ((x) & 0x3fffffff) 149 #define SW_TRAP(x) ((x) & 0x7f) 150 151 #define OP3_OR 0x02 152 #define OP3_RD 0x28 153 #define OP3_JMPL 0x38 154 #define OP3_RETURN 0x39 155 #define OP3_TCC 0x3a 156 #define OP3_SAVE 0x3c 157 #define OP3_RESTORE 0x3d 158 159 #define OP3_PREFETCH 0x2d 160 #define OP3_CASA 0x3c 161 #define OP3_PREFETCHA 0x3d 162 #define OP3_CASXA 0x3e 163 164 #define OP2_ILLTRAP 0x0 165 #define OP2_BPcc 0x1 166 #define OP2_Bicc 0x2 167 #define OP2_BPr 0x3 168 #define OP2_SETHI 0x4 169 #define OP2_FBPfcc 0x5 170 #define OP2_FBfcc 0x6 171 172 #define R_G0 0 173 #define R_O0 8 174 #define R_SP 14 175 #define R_I0 24 176 #define R_I1 25 177 #define R_I2 26 178 #define R_I3 27 179 #define R_I4 28 180 181 /* 182 * Check the comment in fasttrap.h when changing these offsets or adding 183 * new instructions. 184 */ 185 #define FASTTRAP_OFF_SAVE 64 186 #define FASTTRAP_OFF_RESTORE 68 187 #define FASTTRAP_OFF_FTRET 72 188 #define FASTTRAP_OFF_RETURN 76 189 190 #define BREAKPOINT_INSTR 0x91d02001 /* ta 1 */ 191 192 /* 193 * Tunable to let users turn off the fancy save instruction optimization. 194 * If a program is non-ABI compliant, there's a possibility that the save 195 * instruction optimization could cause an error. 196 */ 197 int fasttrap_optimize_save = 1; 198 199 static uint64_t 200 fasttrap_anarg(struct regs *rp, int argno) 201 { 202 uint64_t value; 203 204 if (argno < 6) 205 return ((&rp->r_o0)[argno]); 206 207 if (curproc->p_model == DATAMODEL_NATIVE) { 208 struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS); 209 210 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 211 value = dtrace_fulword(&fr->fr_argd[argno]); 212 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR | 213 CPU_DTRACE_BADALIGN); 214 } else { 215 struct frame32 *fr = (struct frame32 *)rp->r_sp; 216 217 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 218 value = dtrace_fuword32(&fr->fr_argd[argno]); 219 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR | 220 CPU_DTRACE_BADALIGN); 221 } 222 223 return (value); 224 } 225 226 static ulong_t fasttrap_getreg(struct regs *, uint_t); 227 static void fasttrap_putreg(struct regs *, uint_t, ulong_t); 228 229 static void 230 fasttrap_usdt_args(fasttrap_probe_t *probe, struct regs *rp, 231 uint_t fake_restore, int argc, uintptr_t *argv) 232 { 233 int i, x, cap = MIN(argc, probe->ftp_nargs); 234 int inc = (fake_restore ? 16 : 0); 235 236 /* 237 * The only way we'll hit the fake_restore case is if a USDT probe is 238 * invoked as a tail-call. While it wouldn't be incorrect, we can 239 * avoid a call to fasttrap_getreg(), and safely use rp->r_sp 240 * directly since a tail-call can't be made if the invoked function 241 * would use the argument dump space (i.e. if there were more than 242 * 6 arguments). We take this shortcut because unconditionally rooting 243 * around for R_FP (R_SP + 16) would be unnecessarily painful. 244 */ 245 246 if (curproc->p_model == DATAMODEL_NATIVE) { 247 struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS); 248 uintptr_t v; 249 250 for (i = 0; i < cap; i++) { 251 x = probe->ftp_argmap[i]; 252 253 if (x < 6) 254 argv[i] = fasttrap_getreg(rp, R_O0 + x + inc); 255 else if (fasttrap_fulword(&fr->fr_argd[x], &v) != 0) 256 argv[i] = 0; 257 } 258 259 } else { 260 struct frame32 *fr = (struct frame32 *)rp->r_sp; 261 uint32_t v; 262 263 for (i = 0; i < cap; i++) { 264 x = probe->ftp_argmap[i]; 265 266 if (x < 6) 267 argv[i] = fasttrap_getreg(rp, R_O0 + x + inc); 268 else if (fasttrap_fuword32(&fr->fr_argd[x], &v) != 0) 269 argv[i] = 0; 270 } 271 } 272 273 for (; i < argc; i++) { 274 argv[i] = 0; 275 } 276 } 277 278 static void 279 fasttrap_return_common(struct regs *rp, uintptr_t pc, pid_t pid, 280 uint_t fake_restore) 281 { 282 fasttrap_tracepoint_t *tp; 283 fasttrap_bucket_t *bucket; 284 fasttrap_id_t *id; 285 kmutex_t *pid_mtx; 286 dtrace_icookie_t cookie; 287 288 pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock; 289 mutex_enter(pid_mtx); 290 bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]; 291 292 for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { 293 if (pid == tp->ftt_pid && pc == tp->ftt_pc && 294 !tp->ftt_proc->ftpc_defunct) 295 break; 296 } 297 298 /* 299 * Don't sweat it if we can't find the tracepoint again; unlike 300 * when we're in fasttrap_pid_probe(), finding the tracepoint here 301 * is not essential to the correct execution of the process. 302 */ 303 if (tp == NULL || tp->ftt_retids == NULL) { 304 mutex_exit(pid_mtx); 305 return; 306 } 307 308 for (id = tp->ftt_retids; id != NULL; id = id->fti_next) { 309 fasttrap_probe_t *probe = id->fti_probe; 310 311 if (id->fti_ptype == DTFTP_POST_OFFSETS) { 312 if (probe->ftp_argmap != NULL && fake_restore) { 313 uintptr_t t[5]; 314 315 fasttrap_usdt_args(probe, rp, fake_restore, 316 sizeof (t) / sizeof (t[0]), t); 317 318 cookie = dtrace_interrupt_disable(); 319 DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE); 320 dtrace_probe(probe->ftp_id, t[0], t[1], 321 t[2], t[3], t[4]); 322 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE); 323 dtrace_interrupt_enable(cookie); 324 325 } else if (probe->ftp_argmap != NULL) { 326 uintptr_t t[5]; 327 328 fasttrap_usdt_args(probe, rp, fake_restore, 329 sizeof (t) / sizeof (t[0]), t); 330 331 dtrace_probe(probe->ftp_id, t[0], t[1], 332 t[2], t[3], t[4]); 333 334 } else if (fake_restore) { 335 uintptr_t arg0 = fasttrap_getreg(rp, R_I0); 336 uintptr_t arg1 = fasttrap_getreg(rp, R_I1); 337 uintptr_t arg2 = fasttrap_getreg(rp, R_I2); 338 uintptr_t arg3 = fasttrap_getreg(rp, R_I3); 339 uintptr_t arg4 = fasttrap_getreg(rp, R_I4); 340 341 cookie = dtrace_interrupt_disable(); 342 DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE); 343 dtrace_probe(probe->ftp_id, arg0, arg1, 344 arg2, arg3, arg4); 345 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE); 346 dtrace_interrupt_enable(cookie); 347 348 } else { 349 dtrace_probe(probe->ftp_id, rp->r_o0, rp->r_o1, 350 rp->r_o2, rp->r_o3, rp->r_o4); 351 } 352 353 continue; 354 } 355 356 /* 357 * If this is only a possible return point, we must 358 * be looking at a potential tail call in leaf context. 359 * If the %npc is still within this function, then we 360 * must have misidentified a jmpl as a tail-call when it 361 * is, in fact, part of a jump table. It would be nice to 362 * remove this tracepoint, but this is neither the time 363 * nor the place. 364 */ 365 if ((tp->ftt_flags & FASTTRAP_F_RETMAYBE) && 366 rp->r_npc - probe->ftp_faddr < probe->ftp_fsize) 367 continue; 368 369 /* 370 * It's possible for a function to branch to the delay slot 371 * of an instruction that we've identified as a return site. 372 * We can dectect this spurious return probe activation by 373 * observing that in this case %npc will be %pc + 4 and %npc 374 * will be inside the current function (unless the user is 375 * doing _crazy_ instruction picking in which case there's 376 * very little we can do). The second check is important 377 * in case the last instructions of a function make a tail- 378 * call to the function located immediately subsequent. 379 */ 380 if (rp->r_npc == rp->r_pc + 4 && 381 rp->r_npc - probe->ftp_faddr < probe->ftp_fsize) 382 continue; 383 384 /* 385 * The first argument is the offset of return tracepoint 386 * in the function; the remaining arguments are the return 387 * values. 388 * 389 * If fake_restore is set, we need to pull the return values 390 * out of the %i's rather than the %o's -- a little trickier. 391 */ 392 if (!fake_restore) { 393 dtrace_probe(probe->ftp_id, pc - probe->ftp_faddr, 394 rp->r_o0, rp->r_o1, rp->r_o2, rp->r_o3); 395 } else { 396 uintptr_t arg0 = fasttrap_getreg(rp, R_I0); 397 uintptr_t arg1 = fasttrap_getreg(rp, R_I1); 398 uintptr_t arg2 = fasttrap_getreg(rp, R_I2); 399 uintptr_t arg3 = fasttrap_getreg(rp, R_I3); 400 401 cookie = dtrace_interrupt_disable(); 402 DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE); 403 dtrace_probe(probe->ftp_id, pc - probe->ftp_faddr, 404 arg0, arg1, arg2, arg3); 405 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE); 406 dtrace_interrupt_enable(cookie); 407 } 408 } 409 410 mutex_exit(pid_mtx); 411 } 412 413 int 414 fasttrap_pid_probe(struct regs *rp) 415 { 416 proc_t *p = curproc; 417 fasttrap_tracepoint_t *tp, tp_local; 418 fasttrap_id_t *id; 419 pid_t pid; 420 uintptr_t pc = rp->r_pc; 421 uintptr_t npc = rp->r_npc; 422 uintptr_t orig_pc = pc; 423 fasttrap_bucket_t *bucket; 424 kmutex_t *pid_mtx; 425 uint_t fake_restore = 0, is_enabled = 0; 426 dtrace_icookie_t cookie; 427 428 /* 429 * It's possible that a user (in a veritable orgy of bad planning) 430 * could redirect this thread's flow of control before it reached the 431 * return probe fasttrap. In this case we need to kill the process 432 * since it's in a unrecoverable state. 433 */ 434 if (curthread->t_dtrace_step) { 435 ASSERT(curthread->t_dtrace_on); 436 fasttrap_sigtrap(p, curthread, pc); 437 return (0); 438 } 439 440 /* 441 * Clear all user tracing flags. 442 */ 443 curthread->t_dtrace_ft = 0; 444 curthread->t_dtrace_pc = 0; 445 curthread->t_dtrace_npc = 0; 446 curthread->t_dtrace_scrpc = 0; 447 curthread->t_dtrace_astpc = 0; 448 449 /* 450 * Treat a child created by a call to vfork(2) as if it were its 451 * parent. We know that there's only one thread of control in such a 452 * process: this one. 453 */ 454 while (p->p_flag & SVFORK) { 455 p = p->p_parent; 456 } 457 458 pid = p->p_pid; 459 pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock; 460 mutex_enter(pid_mtx); 461 bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]; 462 463 /* 464 * Lookup the tracepoint that the process just hit. 465 */ 466 for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { 467 if (pid == tp->ftt_pid && pc == tp->ftt_pc && 468 !tp->ftt_proc->ftpc_defunct) 469 break; 470 } 471 472 /* 473 * If we couldn't find a matching tracepoint, either a tracepoint has 474 * been inserted without using the pid<pid> ioctl interface (see 475 * fasttrap_ioctl), or somehow we have mislaid this tracepoint. 476 */ 477 if (tp == NULL) { 478 mutex_exit(pid_mtx); 479 return (-1); 480 } 481 482 for (id = tp->ftt_ids; id != NULL; id = id->fti_next) { 483 fasttrap_probe_t *probe = id->fti_probe; 484 int isentry = (id->fti_ptype == DTFTP_ENTRY); 485 486 if (id->fti_ptype == DTFTP_IS_ENABLED) { 487 is_enabled = 1; 488 continue; 489 } 490 491 /* 492 * We note that this was an entry probe to help ustack() find 493 * the first caller. 494 */ 495 if (isentry) { 496 cookie = dtrace_interrupt_disable(); 497 DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY); 498 } 499 dtrace_probe(probe->ftp_id, rp->r_o0, rp->r_o1, rp->r_o2, 500 rp->r_o3, rp->r_o4); 501 if (isentry) { 502 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY); 503 dtrace_interrupt_enable(cookie); 504 } 505 } 506 507 /* 508 * We're about to do a bunch of work so we cache a local copy of 509 * the tracepoint to emulate the instruction, and then find the 510 * tracepoint again later if we need to light up any return probes. 511 */ 512 tp_local = *tp; 513 mutex_exit(pid_mtx); 514 tp = &tp_local; 515 516 /* 517 * If there's an is-enabled probe conntected to this tracepoint it 518 * means that there was a 'mov %g0, %o0' instruction that was placed 519 * there by DTrace when the binary was linked. As this probe is, in 520 * fact, enabled, we need to stuff 1 into %o0. Accordingly, we can 521 * bypass all the instruction emulation logic since we know the 522 * inevitable result. It's possible that a user could construct a 523 * scenario where the 'is-enabled' probe was on some other 524 * instruction, but that would be a rather exotic way to shoot oneself 525 * in the foot. 526 */ 527 if (is_enabled) { 528 rp->r_o0 = 1; 529 pc = rp->r_npc; 530 npc = pc + 4; 531 goto done; 532 } 533 534 /* 535 * We emulate certain types of instructions to ensure correctness 536 * (in the case of position dependent instructions) or optimize 537 * common cases. The rest we have the thread execute back in user- 538 * land. 539 */ 540 switch (tp->ftt_type) { 541 case FASTTRAP_T_SAVE: 542 { 543 int32_t imm; 544 545 /* 546 * This an optimization to let us handle function entry 547 * probes more efficiently. Many functions begin with a save 548 * instruction that follows the pattern: 549 * save %sp, <imm>, %sp 550 * 551 * Meanwhile, we've stashed the instruction: 552 * save %g1, %g0, %sp 553 * 554 * off of %g7, so all we have to do is stick the right value 555 * into %g1 and reset %pc to point to the instruction we've 556 * cleverly hidden (%npc should not be touched). 557 */ 558 559 imm = tp->ftt_instr << 19; 560 imm >>= 19; 561 rp->r_g1 = rp->r_sp + imm; 562 pc = rp->r_g7 + FASTTRAP_OFF_SAVE; 563 break; 564 } 565 566 case FASTTRAP_T_RESTORE: 567 { 568 ulong_t value; 569 uint_t rd; 570 571 /* 572 * This is an optimization to let us handle function 573 * return probes more efficiently. Most non-leaf functions 574 * end with the sequence: 575 * ret 576 * restore <reg>, <reg_or_imm>, %oX 577 * 578 * We've stashed the instruction: 579 * restore %g0, %g0, %g0 580 * 581 * off of %g7 so we just need to place the correct value 582 * in the right %i register (since after our fake-o 583 * restore, the %i's will become the %o's) and set the %pc 584 * to point to our hidden restore. We also set fake_restore to 585 * let fasttrap_return_common() know that it will find the 586 * return values in the %i's rather than the %o's. 587 */ 588 589 if (I(tp->ftt_instr)) { 590 int32_t imm; 591 592 imm = tp->ftt_instr << 19; 593 imm >>= 19; 594 value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + imm; 595 } else { 596 value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + 597 fasttrap_getreg(rp, RS2(tp->ftt_instr)); 598 } 599 600 /* 601 * Convert %o's to %i's; leave %g's as they are. 602 */ 603 rd = RD(tp->ftt_instr); 604 fasttrap_putreg(rp, ((rd & 0x18) == 0x8) ? rd + 16 : rd, value); 605 606 pc = rp->r_g7 + FASTTRAP_OFF_RESTORE; 607 fake_restore = 1; 608 break; 609 } 610 611 case FASTTRAP_T_RETURN: 612 { 613 uintptr_t target; 614 615 /* 616 * A return instruction is like a jmpl (without the link 617 * part) that executes an implicit restore. We've stashed 618 * the instruction: 619 * return %o0 620 * 621 * off of %g7 so we just need to place the target in %o0 622 * and set the %pc to point to the stashed return instruction. 623 * We use %o0 since that register disappears after the return 624 * executes, erasing any evidence of this tampering. 625 */ 626 if (I(tp->ftt_instr)) { 627 int32_t imm; 628 629 imm = tp->ftt_instr << 19; 630 imm >>= 19; 631 target = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + imm; 632 } else { 633 target = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + 634 fasttrap_getreg(rp, RS2(tp->ftt_instr)); 635 } 636 637 fasttrap_putreg(rp, R_O0, target); 638 639 pc = rp->r_g7 + FASTTRAP_OFF_RETURN; 640 fake_restore = 1; 641 break; 642 } 643 644 case FASTTRAP_T_OR: 645 { 646 ulong_t value; 647 648 if (I(tp->ftt_instr)) { 649 int32_t imm; 650 651 imm = tp->ftt_instr << 19; 652 imm >>= 19; 653 value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) | imm; 654 } else { 655 value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) | 656 fasttrap_getreg(rp, RS2(tp->ftt_instr)); 657 } 658 659 fasttrap_putreg(rp, RD(tp->ftt_instr), value); 660 pc = rp->r_npc; 661 npc = pc + 4; 662 break; 663 } 664 665 case FASTTRAP_T_SETHI: 666 if (RD(tp->ftt_instr) != R_G0) { 667 uint32_t imm32 = tp->ftt_instr << 10; 668 fasttrap_putreg(rp, RD(tp->ftt_instr), (ulong_t)imm32); 669 } 670 pc = rp->r_npc; 671 npc = pc + 4; 672 break; 673 674 case FASTTRAP_T_CCR: 675 { 676 uint_t c, v, z, n, taken; 677 uint_t ccr = rp->r_tstate >> TSTATE_CCR_SHIFT; 678 679 if (tp->ftt_cc != 0) 680 ccr >>= 4; 681 682 c = (ccr >> 0) & 1; 683 v = (ccr >> 1) & 1; 684 z = (ccr >> 2) & 1; 685 n = (ccr >> 3) & 1; 686 687 switch (tp->ftt_code) { 688 case 0x0: /* BN */ 689 taken = 0; break; 690 case 0x1: /* BE */ 691 taken = z; break; 692 case 0x2: /* BLE */ 693 taken = z | (n ^ v); break; 694 case 0x3: /* BL */ 695 taken = n ^ v; break; 696 case 0x4: /* BLEU */ 697 taken = c | z; break; 698 case 0x5: /* BCS (BLU) */ 699 taken = c; break; 700 case 0x6: /* BNEG */ 701 taken = n; break; 702 case 0x7: /* BVS */ 703 taken = v; break; 704 case 0x8: /* BA */ 705 /* 706 * We handle the BA case differently since the annul 707 * bit means something slightly different. 708 */ 709 panic("fasttrap: mishandled a branch"); 710 taken = 1; break; 711 case 0x9: /* BNE */ 712 taken = ~z; break; 713 case 0xa: /* BG */ 714 taken = ~(z | (n ^ v)); break; 715 case 0xb: /* BGE */ 716 taken = ~(n ^ v); break; 717 case 0xc: /* BGU */ 718 taken = ~(c | z); break; 719 case 0xd: /* BCC (BGEU) */ 720 taken = ~c; break; 721 case 0xe: /* BPOS */ 722 taken = ~n; break; 723 case 0xf: /* BVC */ 724 taken = ~v; break; 725 } 726 727 if (taken & 1) { 728 pc = rp->r_npc; 729 npc = tp->ftt_dest; 730 } else if (tp->ftt_flags & FASTTRAP_F_ANNUL) { 731 /* 732 * Untaken annulled branches don't execute the 733 * instruction in the delay slot. 734 */ 735 pc = rp->r_npc + 4; 736 npc = pc + 4; 737 } else { 738 pc = rp->r_npc; 739 npc = pc + 4; 740 } 741 break; 742 } 743 744 case FASTTRAP_T_FCC: 745 { 746 uint_t fcc; 747 uint_t taken; 748 uint64_t fsr; 749 750 dtrace_getfsr(&fsr); 751 752 if (tp->ftt_cc == 0) { 753 fcc = (fsr >> 10) & 0x3; 754 } else { 755 uint_t shift; 756 ASSERT(tp->ftt_cc <= 3); 757 shift = 30 + tp->ftt_cc * 2; 758 fcc = (fsr >> shift) & 0x3; 759 } 760 761 switch (tp->ftt_code) { 762 case 0x0: /* FBN */ 763 taken = (1 << fcc) & (0|0|0|0); break; 764 case 0x1: /* FBNE */ 765 taken = (1 << fcc) & (8|4|2|0); break; 766 case 0x2: /* FBLG */ 767 taken = (1 << fcc) & (0|4|2|0); break; 768 case 0x3: /* FBUL */ 769 taken = (1 << fcc) & (8|0|2|0); break; 770 case 0x4: /* FBL */ 771 taken = (1 << fcc) & (0|0|2|0); break; 772 case 0x5: /* FBUG */ 773 taken = (1 << fcc) & (8|4|0|0); break; 774 case 0x6: /* FBG */ 775 taken = (1 << fcc) & (0|4|0|0); break; 776 case 0x7: /* FBU */ 777 taken = (1 << fcc) & (8|0|0|0); break; 778 case 0x8: /* FBA */ 779 /* 780 * We handle the FBA case differently since the annul 781 * bit means something slightly different. 782 */ 783 panic("fasttrap: mishandled a branch"); 784 taken = (1 << fcc) & (8|4|2|1); break; 785 case 0x9: /* FBE */ 786 taken = (1 << fcc) & (0|0|0|1); break; 787 case 0xa: /* FBUE */ 788 taken = (1 << fcc) & (8|0|0|1); break; 789 case 0xb: /* FBGE */ 790 taken = (1 << fcc) & (0|4|0|1); break; 791 case 0xc: /* FBUGE */ 792 taken = (1 << fcc) & (8|4|0|1); break; 793 case 0xd: /* FBLE */ 794 taken = (1 << fcc) & (0|0|2|1); break; 795 case 0xe: /* FBULE */ 796 taken = (1 << fcc) & (8|0|2|1); break; 797 case 0xf: /* FBO */ 798 taken = (1 << fcc) & (0|4|2|1); break; 799 } 800 801 if (taken) { 802 pc = rp->r_npc; 803 npc = tp->ftt_dest; 804 } else if (tp->ftt_flags & FASTTRAP_F_ANNUL) { 805 /* 806 * Untaken annulled branches don't execute the 807 * instruction in the delay slot. 808 */ 809 pc = rp->r_npc + 4; 810 npc = pc + 4; 811 } else { 812 pc = rp->r_npc; 813 npc = pc + 4; 814 } 815 break; 816 } 817 818 case FASTTRAP_T_REG: 819 { 820 int64_t value; 821 uint_t taken; 822 uint_t reg = RS1(tp->ftt_instr); 823 824 /* 825 * An ILP32 process shouldn't be using a branch predicated on 826 * an %i or an %l since it would violate the ABI. It's a 827 * violation of the ABI because we can't ensure deterministic 828 * behavior. We should have identified this case when we 829 * enabled the probe. 830 */ 831 ASSERT(p->p_model == DATAMODEL_LP64 || reg < 16); 832 833 value = (int64_t)fasttrap_getreg(rp, reg); 834 835 switch (tp->ftt_code) { 836 case 0x1: /* BRZ */ 837 taken = (value == 0); break; 838 case 0x2: /* BRLEZ */ 839 taken = (value <= 0); break; 840 case 0x3: /* BRLZ */ 841 taken = (value < 0); break; 842 case 0x5: /* BRNZ */ 843 taken = (value != 0); break; 844 case 0x6: /* BRGZ */ 845 taken = (value > 0); break; 846 case 0x7: /* BRGEZ */ 847 taken = (value >= 0); break; 848 default: 849 case 0x0: 850 case 0x4: 851 panic("fasttrap: mishandled a branch"); 852 } 853 854 if (taken) { 855 pc = rp->r_npc; 856 npc = tp->ftt_dest; 857 } else if (tp->ftt_flags & FASTTRAP_F_ANNUL) { 858 /* 859 * Untaken annulled branches don't execute the 860 * instruction in the delay slot. 861 */ 862 pc = rp->r_npc + 4; 863 npc = pc + 4; 864 } else { 865 pc = rp->r_npc; 866 npc = pc + 4; 867 } 868 break; 869 } 870 871 case FASTTRAP_T_ALWAYS: 872 /* 873 * BAs, BA,As... 874 */ 875 876 if (tp->ftt_flags & FASTTRAP_F_ANNUL) { 877 /* 878 * Annulled branch always instructions never execute 879 * the instruction in the delay slot. 880 */ 881 pc = tp->ftt_dest; 882 npc = tp->ftt_dest + 4; 883 } else { 884 pc = rp->r_npc; 885 npc = tp->ftt_dest; 886 } 887 break; 888 889 case FASTTRAP_T_RDPC: 890 fasttrap_putreg(rp, RD(tp->ftt_instr), rp->r_pc); 891 pc = rp->r_npc; 892 npc = pc + 4; 893 break; 894 895 case FASTTRAP_T_CALL: 896 /* 897 * It's a call _and_ link remember... 898 */ 899 rp->r_o7 = rp->r_pc; 900 pc = rp->r_npc; 901 npc = tp->ftt_dest; 902 break; 903 904 case FASTTRAP_T_JMPL: 905 pc = rp->r_npc; 906 907 if (I(tp->ftt_instr)) { 908 uint_t rs1 = RS1(tp->ftt_instr); 909 int32_t imm; 910 911 imm = tp->ftt_instr << 19; 912 imm >>= 19; 913 npc = fasttrap_getreg(rp, rs1) + imm; 914 } else { 915 uint_t rs1 = RS1(tp->ftt_instr); 916 uint_t rs2 = RS2(tp->ftt_instr); 917 918 npc = fasttrap_getreg(rp, rs1) + 919 fasttrap_getreg(rp, rs2); 920 } 921 922 /* 923 * Do the link part of the jump-and-link instruction. 924 */ 925 fasttrap_putreg(rp, RD(tp->ftt_instr), rp->r_pc); 926 927 break; 928 929 case FASTTRAP_T_COMMON: 930 { 931 curthread->t_dtrace_scrpc = rp->r_g7; 932 curthread->t_dtrace_astpc = rp->r_g7 + FASTTRAP_OFF_FTRET; 933 934 /* 935 * Copy the instruction to a reserved location in the 936 * user-land thread structure, then set the PC to that 937 * location and leave the NPC alone. We take pains to ensure 938 * consistency in the instruction stream (See SPARC 939 * Architecture Manual Version 9, sections 8.4.7, A.20, and 940 * H.1.6; UltraSPARC I/II User's Manual, sections 3.1.1.1, 941 * and 13.6.4) by using the ASI ASI_BLK_COMMIT_S to copy the 942 * instruction into the user's address space without 943 * bypassing the I$. There's no AS_USER version of this ASI 944 * (as exist for other ASIs) so we use the lofault 945 * mechanism to catch faults. 946 */ 947 if (dtrace_blksuword32(rp->r_g7, &tp->ftt_instr, 1) == -1) { 948 /* 949 * If the copyout fails, then the process's state 950 * is not consistent (the effects of the traced 951 * instruction will never be seen). This process 952 * cannot be allowed to continue execution. 953 */ 954 fasttrap_sigtrap(curproc, curthread, pc); 955 return (0); 956 } 957 958 curthread->t_dtrace_pc = pc; 959 curthread->t_dtrace_npc = npc; 960 curthread->t_dtrace_on = 1; 961 962 pc = curthread->t_dtrace_scrpc; 963 964 if (tp->ftt_retids != NULL) { 965 curthread->t_dtrace_step = 1; 966 curthread->t_dtrace_ret = 1; 967 npc = curthread->t_dtrace_astpc; 968 } 969 break; 970 } 971 972 default: 973 panic("fasttrap: mishandled an instruction"); 974 } 975 976 /* 977 * This bit me in the ass a couple of times, so lets toss this 978 * in as a cursory sanity check. 979 */ 980 ASSERT(pc != rp->r_g7 + 4); 981 ASSERT(pc != rp->r_g7 + 8); 982 983 done: 984 /* 985 * If there were no return probes when we first found the tracepoint, 986 * we should feel no obligation to honor any return probes that were 987 * subsequently enabled -- they'll just have to wait until the next 988 * time around. 989 */ 990 if (tp->ftt_retids != NULL) { 991 /* 992 * We need to wait until the results of the instruction are 993 * apparent before invoking any return probes. If this 994 * instruction was emulated we can just call 995 * fasttrap_return_common(); if it needs to be executed, we 996 * need to wait until we return to the kernel. 997 */ 998 if (tp->ftt_type != FASTTRAP_T_COMMON) { 999 fasttrap_return_common(rp, orig_pc, pid, fake_restore); 1000 } else { 1001 ASSERT(curthread->t_dtrace_ret != 0); 1002 ASSERT(curthread->t_dtrace_pc == orig_pc); 1003 ASSERT(curthread->t_dtrace_scrpc == rp->r_g7); 1004 ASSERT(npc == curthread->t_dtrace_astpc); 1005 } 1006 } 1007 1008 ASSERT(pc != 0); 1009 rp->r_pc = pc; 1010 rp->r_npc = npc; 1011 1012 return (0); 1013 } 1014 1015 int 1016 fasttrap_return_probe(struct regs *rp) 1017 { 1018 proc_t *p = ttoproc(curthread); 1019 pid_t pid; 1020 uintptr_t pc = curthread->t_dtrace_pc; 1021 uintptr_t npc = curthread->t_dtrace_npc; 1022 1023 curthread->t_dtrace_pc = 0; 1024 curthread->t_dtrace_npc = 0; 1025 curthread->t_dtrace_scrpc = 0; 1026 curthread->t_dtrace_astpc = 0; 1027 1028 /* 1029 * Treat a child created by a call to vfork(2) as if it were its 1030 * parent. We know there's only one thread of control in such a 1031 * process: this one. 1032 */ 1033 while (p->p_flag & SVFORK) { 1034 p = p->p_parent; 1035 } 1036 1037 /* 1038 * We set the %pc and %npc to their values when the traced 1039 * instruction was initially executed so that it appears to 1040 * dtrace_probe() that we're on the original instruction, and so that 1041 * the user can't easily detect our complex web of lies. 1042 * dtrace_return_probe() (our caller) will correctly set %pc and %npc 1043 * after we return. 1044 */ 1045 rp->r_pc = pc; 1046 rp->r_npc = npc; 1047 1048 pid = p->p_pid; 1049 fasttrap_return_common(rp, pc, pid, 0); 1050 1051 return (0); 1052 } 1053 1054 int 1055 fasttrap_tracepoint_install(proc_t *p, fasttrap_tracepoint_t *tp) 1056 { 1057 fasttrap_instr_t instr = FASTTRAP_INSTR; 1058 1059 if (uwrite(p, &instr, 4, tp->ftt_pc) != 0) 1060 return (-1); 1061 1062 return (0); 1063 } 1064 1065 int 1066 fasttrap_tracepoint_remove(proc_t *p, fasttrap_tracepoint_t *tp) 1067 { 1068 fasttrap_instr_t instr; 1069 1070 /* 1071 * Distinguish between read or write failures and a changed 1072 * instruction. 1073 */ 1074 if (uread(p, &instr, 4, tp->ftt_pc) != 0) 1075 return (0); 1076 if (instr != FASTTRAP_INSTR && instr != BREAKPOINT_INSTR) 1077 return (0); 1078 if (uwrite(p, &tp->ftt_instr, 4, tp->ftt_pc) != 0) 1079 return (-1); 1080 1081 return (0); 1082 } 1083 1084 int 1085 fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, uintptr_t pc, 1086 fasttrap_probe_type_t type) 1087 { 1088 uint32_t instr; 1089 int32_t disp; 1090 1091 /* 1092 * Read the instruction at the given address out of the process's 1093 * address space. We don't have to worry about a debugger 1094 * changing this instruction before we overwrite it with our trap 1095 * instruction since P_PR_LOCK is set. 1096 */ 1097 if (uread(p, &instr, 4, pc) != 0) 1098 return (-1); 1099 1100 /* 1101 * Decode the instruction to fill in the probe flags. We can have 1102 * the process execute most instructions on its own using a pc/npc 1103 * trick, but pc-relative control transfer present a problem since 1104 * we're relocating the instruction. We emulate these instructions 1105 * in the kernel. We assume a default type and over-write that as 1106 * needed. 1107 * 1108 * pc-relative instructions must be emulated for correctness; 1109 * other instructions (which represent a large set of commonly traced 1110 * instructions) are emulated or otherwise optimized for performance. 1111 */ 1112 tp->ftt_type = FASTTRAP_T_COMMON; 1113 if (OP(instr) == 1) { 1114 /* 1115 * Call instructions. 1116 */ 1117 tp->ftt_type = FASTTRAP_T_CALL; 1118 disp = DISP30(instr) << 2; 1119 tp->ftt_dest = pc + (intptr_t)disp; 1120 1121 } else if (OP(instr) == 0) { 1122 /* 1123 * Branch instructions. 1124 * 1125 * Unconditional branches need careful attention when they're 1126 * annulled: annulled unconditional branches never execute 1127 * the instruction in the delay slot. 1128 */ 1129 switch (OP2(instr)) { 1130 case OP2_ILLTRAP: 1131 case 0x7: 1132 /* 1133 * The compiler may place an illtrap after a call to 1134 * a function that returns a structure. In the case of 1135 * a returned structure, the compiler places an illtrap 1136 * whose const22 field is the size of the returned 1137 * structure immediately following the delay slot of 1138 * the call. To stay out of the way, we refuse to 1139 * place tracepoints on top of illtrap instructions. 1140 * 1141 * This is one of the dumbest architectural decisions 1142 * I've ever had to work around. 1143 * 1144 * We also identify the only illegal op2 value (See 1145 * SPARC Architecture Manual Version 9, E.2 table 31). 1146 */ 1147 return (-1); 1148 1149 case OP2_BPcc: 1150 if (COND(instr) == 8) { 1151 tp->ftt_type = FASTTRAP_T_ALWAYS; 1152 } else { 1153 /* 1154 * Check for an illegal instruction. 1155 */ 1156 if (CC(instr) & 1) 1157 return (-1); 1158 tp->ftt_type = FASTTRAP_T_CCR; 1159 tp->ftt_cc = CC(instr); 1160 tp->ftt_code = COND(instr); 1161 } 1162 1163 if (A(instr) != 0) 1164 tp->ftt_flags |= FASTTRAP_F_ANNUL; 1165 1166 disp = DISP19(instr); 1167 disp <<= 13; 1168 disp >>= 11; 1169 tp->ftt_dest = pc + (intptr_t)disp; 1170 break; 1171 1172 case OP2_Bicc: 1173 if (COND(instr) == 8) { 1174 tp->ftt_type = FASTTRAP_T_ALWAYS; 1175 } else { 1176 tp->ftt_type = FASTTRAP_T_CCR; 1177 tp->ftt_cc = 0; 1178 tp->ftt_code = COND(instr); 1179 } 1180 1181 if (A(instr) != 0) 1182 tp->ftt_flags |= FASTTRAP_F_ANNUL; 1183 1184 disp = DISP22(instr); 1185 disp <<= 10; 1186 disp >>= 8; 1187 tp->ftt_dest = pc + (intptr_t)disp; 1188 break; 1189 1190 case OP2_BPr: 1191 /* 1192 * Check for an illegal instruction. 1193 */ 1194 if ((RCOND(instr) & 3) == 0) 1195 return (-1); 1196 1197 /* 1198 * It's a violation of the v8plus ABI to use a 1199 * register-predicated branch in a 32-bit app if 1200 * the register used is an %l or an %i (%gs and %os 1201 * are legit because they're not saved to the stack 1202 * in 32-bit words when we take a trap). 1203 */ 1204 if (p->p_model == DATAMODEL_ILP32 && RS1(instr) >= 16) 1205 return (-1); 1206 1207 tp->ftt_type = FASTTRAP_T_REG; 1208 if (A(instr) != 0) 1209 tp->ftt_flags |= FASTTRAP_F_ANNUL; 1210 disp = DISP16(instr); 1211 disp <<= 16; 1212 disp >>= 14; 1213 tp->ftt_dest = pc + (intptr_t)disp; 1214 tp->ftt_code = RCOND(instr); 1215 break; 1216 1217 case OP2_SETHI: 1218 tp->ftt_type = FASTTRAP_T_SETHI; 1219 break; 1220 1221 case OP2_FBPfcc: 1222 if (COND(instr) == 8) { 1223 tp->ftt_type = FASTTRAP_T_ALWAYS; 1224 } else { 1225 tp->ftt_type = FASTTRAP_T_FCC; 1226 tp->ftt_cc = CC(instr); 1227 tp->ftt_code = COND(instr); 1228 } 1229 1230 if (A(instr) != 0) 1231 tp->ftt_flags |= FASTTRAP_F_ANNUL; 1232 1233 disp = DISP19(instr); 1234 disp <<= 13; 1235 disp >>= 11; 1236 tp->ftt_dest = pc + (intptr_t)disp; 1237 break; 1238 1239 case OP2_FBfcc: 1240 if (COND(instr) == 8) { 1241 tp->ftt_type = FASTTRAP_T_ALWAYS; 1242 } else { 1243 tp->ftt_type = FASTTRAP_T_FCC; 1244 tp->ftt_cc = 0; 1245 tp->ftt_code = COND(instr); 1246 } 1247 1248 if (A(instr) != 0) 1249 tp->ftt_flags |= FASTTRAP_F_ANNUL; 1250 1251 disp = DISP22(instr); 1252 disp <<= 10; 1253 disp >>= 8; 1254 tp->ftt_dest = pc + (intptr_t)disp; 1255 break; 1256 } 1257 1258 } else if (OP(instr) == 2) { 1259 switch (OP3(instr)) { 1260 case OP3_RETURN: 1261 tp->ftt_type = FASTTRAP_T_RETURN; 1262 break; 1263 1264 case OP3_JMPL: 1265 tp->ftt_type = FASTTRAP_T_JMPL; 1266 break; 1267 1268 case OP3_RD: 1269 if (RS1(instr) == 5) 1270 tp->ftt_type = FASTTRAP_T_RDPC; 1271 break; 1272 1273 case OP3_SAVE: 1274 /* 1275 * We optimize for save instructions at function 1276 * entry; see the comment in fasttrap_pid_probe() 1277 * (near FASTTRAP_T_SAVE) for details. 1278 */ 1279 if (fasttrap_optimize_save != 0 && 1280 type == DTFTP_ENTRY && 1281 I(instr) == 1 && RD(instr) == R_SP) 1282 tp->ftt_type = FASTTRAP_T_SAVE; 1283 break; 1284 1285 case OP3_RESTORE: 1286 /* 1287 * We optimize restore instructions at function 1288 * return; see the comment in fasttrap_pid_probe() 1289 * (near FASTTRAP_T_RESTORE) for details. 1290 * 1291 * rd must be an %o or %g register. 1292 */ 1293 if ((RD(instr) & 0x10) == 0) 1294 tp->ftt_type = FASTTRAP_T_RESTORE; 1295 break; 1296 1297 case OP3_OR: 1298 /* 1299 * A large proportion of instructions in the delay 1300 * slot of retl instructions are or's so we emulate 1301 * these downstairs as an optimization. 1302 */ 1303 tp->ftt_type = FASTTRAP_T_OR; 1304 break; 1305 1306 case OP3_TCC: 1307 /* 1308 * Breakpoint instructions are effectively position- 1309 * dependent since the debugger uses the %pc value 1310 * to lookup which breakpoint was executed. As a 1311 * result, we can't actually instrument breakpoints. 1312 */ 1313 if (SW_TRAP(instr) == ST_BREAKPOINT) 1314 return (-1); 1315 break; 1316 1317 case 0x19: 1318 case 0x1d: 1319 case 0x29: 1320 case 0x33: 1321 case 0x3f: 1322 /* 1323 * Identify illegal instructions (See SPARC 1324 * Architecture Manual Version 9, E.2 table 32). 1325 */ 1326 return (-1); 1327 } 1328 } else if (OP(instr) == 3) { 1329 uint32_t op3 = OP3(instr); 1330 1331 /* 1332 * Identify illegal instructions (See SPARC Architecture 1333 * Manual Version 9, E.2 table 33). 1334 */ 1335 if ((op3 & 0x28) == 0x28) { 1336 if (op3 != OP3_PREFETCH && op3 != OP3_CASA && 1337 op3 != OP3_PREFETCHA && op3 != OP3_CASXA) 1338 return (-1); 1339 } else { 1340 if ((op3 & 0x0f) == 0x0c || (op3 & 0x3b) == 0x31) 1341 return (-1); 1342 } 1343 } 1344 1345 tp->ftt_instr = instr; 1346 1347 /* 1348 * We don't know how this tracepoint is going to be used, but in case 1349 * it's used as part of a function return probe, we need to indicate 1350 * whether it's always a return site or only potentially a return 1351 * site. If it's part of a return probe, it's always going to be a 1352 * return from that function if it's a restore instruction or if 1353 * the previous instruction was a return. If we could reliably 1354 * distinguish jump tables from return sites, this wouldn't be 1355 * necessary. 1356 */ 1357 if (tp->ftt_type != FASTTRAP_T_RESTORE && 1358 (uread(p, &instr, 4, pc - sizeof (instr)) != 0 || 1359 !(OP(instr) == 2 && OP3(instr) == OP3_RETURN))) 1360 tp->ftt_flags |= FASTTRAP_F_RETMAYBE; 1361 1362 return (0); 1363 } 1364 1365 /*ARGSUSED*/ 1366 uint64_t 1367 fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno, 1368 int aframes) 1369 { 1370 return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, argno)); 1371 } 1372 1373 /*ARGSUSED*/ 1374 uint64_t 1375 fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno, 1376 int aframes) 1377 { 1378 return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, argno)); 1379 } 1380 1381 static uint64_t fasttrap_getreg_fast_cnt; 1382 static uint64_t fasttrap_getreg_mpcb_cnt; 1383 static uint64_t fasttrap_getreg_slow_cnt; 1384 1385 static ulong_t 1386 fasttrap_getreg(struct regs *rp, uint_t reg) 1387 { 1388 ulong_t value; 1389 dtrace_icookie_t cookie; 1390 struct machpcb *mpcb; 1391 extern ulong_t dtrace_getreg_win(uint_t, uint_t); 1392 1393 /* 1394 * We have the %os and %gs in our struct regs, but if we need to 1395 * snag a %l or %i we need to go scrounging around in the process's 1396 * address space. 1397 */ 1398 if (reg == 0) 1399 return (0); 1400 1401 if (reg < 16) 1402 return ((&rp->r_g1)[reg - 1]); 1403 1404 /* 1405 * Before we look at the user's stack, we'll check the register 1406 * windows to see if the information we want is in there. 1407 */ 1408 cookie = dtrace_interrupt_disable(); 1409 if (dtrace_getotherwin() > 0) { 1410 value = dtrace_getreg_win(reg, 1); 1411 dtrace_interrupt_enable(cookie); 1412 1413 atomic_add_64(&fasttrap_getreg_fast_cnt, 1); 1414 1415 return (value); 1416 } 1417 dtrace_interrupt_enable(cookie); 1418 1419 /* 1420 * First check the machpcb structure to see if we've already read 1421 * in the register window we're looking for; if we haven't, (and 1422 * we probably haven't) try to copy in the value of the register. 1423 */ 1424 /* LINTED - alignment */ 1425 mpcb = (struct machpcb *)((caddr_t)rp - REGOFF); 1426 1427 if (get_udatamodel() == DATAMODEL_NATIVE) { 1428 struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS); 1429 1430 if (mpcb->mpcb_wbcnt > 0) { 1431 struct rwindow *rwin = (void *)mpcb->mpcb_wbuf; 1432 int i = mpcb->mpcb_wbcnt; 1433 do { 1434 i--; 1435 if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp) 1436 continue; 1437 1438 atomic_add_64(&fasttrap_getreg_mpcb_cnt, 1); 1439 return (rwin[i].rw_local[reg - 16]); 1440 } while (i > 0); 1441 } 1442 1443 if (fasttrap_fulword(&fr->fr_local[reg - 16], &value) != 0) 1444 goto err; 1445 } else { 1446 struct frame32 *fr = 1447 (struct frame32 *)(uintptr_t)(caddr32_t)rp->r_sp; 1448 uint32_t *v32 = (uint32_t *)&value; 1449 1450 if (mpcb->mpcb_wbcnt > 0) { 1451 struct rwindow32 *rwin = (void *)mpcb->mpcb_wbuf; 1452 int i = mpcb->mpcb_wbcnt; 1453 do { 1454 i--; 1455 if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp) 1456 continue; 1457 1458 atomic_add_64(&fasttrap_getreg_mpcb_cnt, 1); 1459 return (rwin[i].rw_local[reg - 16]); 1460 } while (i > 0); 1461 } 1462 1463 if (fasttrap_fuword32(&fr->fr_local[reg - 16], &v32[1]) != 0) 1464 goto err; 1465 1466 v32[0] = 0; 1467 } 1468 1469 atomic_add_64(&fasttrap_getreg_slow_cnt, 1); 1470 return (value); 1471 1472 err: 1473 /* 1474 * If the copy in failed, the process will be in a irrecoverable 1475 * state, and we have no choice but to kill it. 1476 */ 1477 psignal(ttoproc(curthread), SIGILL); 1478 return (0); 1479 } 1480 1481 static uint64_t fasttrap_putreg_fast_cnt; 1482 static uint64_t fasttrap_putreg_mpcb_cnt; 1483 static uint64_t fasttrap_putreg_slow_cnt; 1484 1485 static void 1486 fasttrap_putreg(struct regs *rp, uint_t reg, ulong_t value) 1487 { 1488 dtrace_icookie_t cookie; 1489 struct machpcb *mpcb; 1490 extern void dtrace_putreg_win(uint_t, ulong_t); 1491 1492 if (reg == 0) 1493 return; 1494 1495 if (reg < 16) { 1496 (&rp->r_g1)[reg - 1] = value; 1497 return; 1498 } 1499 1500 /* 1501 * If the user process is still using some register windows, we 1502 * can just place the value in the correct window. 1503 */ 1504 cookie = dtrace_interrupt_disable(); 1505 if (dtrace_getotherwin() > 0) { 1506 dtrace_putreg_win(reg, value); 1507 dtrace_interrupt_enable(cookie); 1508 atomic_add_64(&fasttrap_putreg_fast_cnt, 1); 1509 return; 1510 } 1511 dtrace_interrupt_enable(cookie); 1512 1513 /* 1514 * First see if there's a copy of the register window in the 1515 * machpcb structure that we can modify; if there isn't try to 1516 * copy out the value. If that fails, we try to create a new 1517 * register window in the machpcb structure. While this isn't 1518 * _precisely_ the intended use of the machpcb structure, it 1519 * can't cause any problems since we know at this point in the 1520 * code that all of the user's data have been flushed out of the 1521 * register file (since %otherwin is 0). 1522 */ 1523 /* LINTED - alignment */ 1524 mpcb = (struct machpcb *)((caddr_t)rp - REGOFF); 1525 1526 if (get_udatamodel() == DATAMODEL_NATIVE) { 1527 struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS); 1528 /* LINTED - alignment */ 1529 struct rwindow *rwin = (struct rwindow *)mpcb->mpcb_wbuf; 1530 1531 if (mpcb->mpcb_wbcnt > 0) { 1532 int i = mpcb->mpcb_wbcnt; 1533 do { 1534 i--; 1535 if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp) 1536 continue; 1537 1538 rwin[i].rw_local[reg - 16] = value; 1539 atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1); 1540 return; 1541 } while (i > 0); 1542 } 1543 1544 if (fasttrap_sulword(&fr->fr_local[reg - 16], value) != 0) { 1545 if (mpcb->mpcb_wbcnt >= MAXWIN || copyin(fr, 1546 &rwin[mpcb->mpcb_wbcnt], sizeof (*rwin)) != 0) 1547 goto err; 1548 1549 rwin[mpcb->mpcb_wbcnt].rw_local[reg - 16] = value; 1550 mpcb->mpcb_spbuf[mpcb->mpcb_wbcnt] = (caddr_t)rp->r_sp; 1551 mpcb->mpcb_wbcnt++; 1552 atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1); 1553 return; 1554 } 1555 } else { 1556 struct frame32 *fr = 1557 (struct frame32 *)(uintptr_t)(caddr32_t)rp->r_sp; 1558 /* LINTED - alignment */ 1559 struct rwindow32 *rwin = (struct rwindow32 *)mpcb->mpcb_wbuf; 1560 uint32_t v32 = (uint32_t)value; 1561 1562 if (mpcb->mpcb_wbcnt > 0) { 1563 int i = mpcb->mpcb_wbcnt; 1564 do { 1565 i--; 1566 if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp) 1567 continue; 1568 1569 rwin[i].rw_local[reg - 16] = v32; 1570 atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1); 1571 return; 1572 } while (i > 0); 1573 } 1574 1575 if (fasttrap_suword32(&fr->fr_local[reg - 16], v32) != 0) { 1576 if (mpcb->mpcb_wbcnt >= MAXWIN || copyin(fr, 1577 &rwin[mpcb->mpcb_wbcnt], sizeof (*rwin)) != 0) 1578 goto err; 1579 1580 rwin[mpcb->mpcb_wbcnt].rw_local[reg - 16] = v32; 1581 mpcb->mpcb_spbuf[mpcb->mpcb_wbcnt] = (caddr_t)rp->r_sp; 1582 mpcb->mpcb_wbcnt++; 1583 atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1); 1584 return; 1585 } 1586 } 1587 1588 atomic_add_64(&fasttrap_putreg_slow_cnt, 1); 1589 return; 1590 1591 err: 1592 /* 1593 * If we couldn't record this register's value, the process is in an 1594 * irrecoverable state and we have no choice but to euthanize it. 1595 */ 1596 psignal(ttoproc(curthread), SIGILL); 1597 } 1598