1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/fasttrap_isa.h> 30 #include <sys/fasttrap_impl.h> 31 #include <sys/dtrace.h> 32 #include <sys/dtrace_impl.h> 33 #include <sys/cmn_err.h> 34 #include <sys/frame.h> 35 #include <sys/stack.h> 36 #include <sys/sysmacros.h> 37 #include <sys/trap.h> 38 39 #include <v9/sys/machpcb.h> 40 #include <v9/sys/privregs.h> 41 42 /* 43 * Lossless User-Land Tracing on SPARC 44 * ----------------------------------- 45 * 46 * The Basic Idea 47 * 48 * The most important design constraint is, of course, correct execution of 49 * the user thread above all else. The next most important goal is rapid 50 * execution. We combine execution of instructions in user-land with 51 * emulation of certain instructions in the kernel to aim for complete 52 * correctness and maximal performance. 53 * 54 * We take advantage of the split PC/NPC architecture to speed up logical 55 * single-stepping; when we copy an instruction out to the scratch space in 56 * the ulwp_t structure (held in the %g7 register on SPARC), we can 57 * effectively single step by setting the PC to our scratch space and leaving 58 * the NPC alone. This executes the replaced instruction and then continues 59 * on without having to reenter the kernel as with single- stepping. The 60 * obvious caveat is for instructions whose execution is PC dependant -- 61 * branches, call and link instructions (call and jmpl), and the rdpc 62 * instruction. These instructions cannot be executed in the manner described 63 * so they must be emulated in the kernel. 64 * 65 * Emulation for this small set of instructions if fairly simple; the most 66 * difficult part being emulating branch conditions. 67 * 68 * 69 * A Cache Heavy Portfolio 70 * 71 * It's important to note at this time that copying an instruction out to the 72 * ulwp_t scratch space in user-land is rather complicated. SPARC has 73 * separate data and instruction caches so any writes to the D$ (using a 74 * store instruction for example) aren't necessarily reflected in the I$. 75 * The flush instruction can be used to synchronize the two and must be used 76 * for any self-modifying code, but the flush instruction only applies to the 77 * primary address space (the absence of a flusha analogue to the flush 78 * instruction that accepts an ASI argument is an obvious omission from SPARC 79 * v9 where the notion of the alternate address space was introduced on 80 * SPARC). To correctly copy out the instruction we must use a block store 81 * that doesn't allocate in the D$ and ensures synchronization with the I$; 82 * see dtrace_blksuword32() for the implementation (this function uses 83 * ASI_BLK_COMMIT_S to write a block through the secondary ASI in the manner 84 * described). Refer to the UltraSPARC I/II manual for details on the 85 * ASI_BLK_COMMIT_S ASI. 86 * 87 * 88 * Return Subtleties 89 * 90 * When we're firing a return probe we need to expose the value returned by 91 * the function being traced. Since the function can set the return value 92 * in its last instruction, we need to fire the return probe only _after_ 93 * the effects of the instruction are apparent. For instructions that we 94 * emulate, we can call dtrace_probe() after we've performed the emulation; 95 * for instructions that we execute after we return to user-land, we set 96 * %pc to the instruction we copied out (as described above) and set %npc 97 * to a trap instruction stashed in the ulwp_t structure. After the traced 98 * instruction is executed, the trap instruction returns control to the 99 * kernel where we can fire the return probe. 100 * 101 * This need for a second trap in cases where we execute the traced 102 * instruction makes it all the more important to emulate the most common 103 * instructions to avoid the second trip in and out of the kernel. 104 * 105 * 106 * Making it Fast 107 * 108 * Since copying out an instruction is neither simple nor inexpensive for the 109 * CPU, we should attempt to avoid doing it in as many cases as possible. 110 * Since function entry and return are usually the most interesting probe 111 * sites, we attempt to tune the performance of the fasttrap provider around 112 * instructions typically in those places. 113 * 114 * Looking at a bunch of functions in libraries and executables reveals that 115 * most functions begin with either a save or a sethi (to setup a larger 116 * argument to the save) and end with a restore or an or (in the case of leaf 117 * functions). To try to improve performance, we emulate all of these 118 * instructions in the kernel. 119 * 120 * The save and restore instructions are a little tricky since they perform 121 * register window maniplulation. Rather than trying to tinker with the 122 * register windows from the kernel, we emulate the implicit add that takes 123 * place as part of those instructions and set the %pc to point to a simple 124 * save or restore we've hidden in the ulwp_t structure. If we're in a return 125 * probe so want to make it seem as though the tracepoint has been completely 126 * executed we need to remember that we've pulled this trick with restore and 127 * pull registers from the previous window (the one that we'll switch to once 128 * the simple store instruction is executed) rather than the current one. This 129 * is why in the case of emulating a restore we set the DTrace CPU flag 130 * CPU_DTRACE_FAKERESTORE before calling dtrace_probe() for the return probes 131 * (see fasttrap_return_common()). 132 */ 133 134 #define OP(x) ((x) >> 30) 135 #define OP2(x) (((x) >> 22) & 0x07) 136 #define OP3(x) (((x) >> 19) & 0x3f) 137 #define RCOND(x) (((x) >> 25) & 0x07) 138 #define COND(x) (((x) >> 25) & 0x0f) 139 #define A(x) (((x) >> 29) & 0x01) 140 #define I(x) (((x) >> 13) & 0x01) 141 #define RD(x) (((x) >> 25) & 0x1f) 142 #define RS1(x) (((x) >> 14) & 0x1f) 143 #define RS2(x) (((x) >> 0) & 0x1f) 144 #define CC(x) (((x) >> 20) & 0x03) 145 #define DISP16(x) ((((x) >> 6) & 0xc000) | ((x) & 0x3fff)) 146 #define DISP22(x) ((x) & 0x3fffff) 147 #define DISP19(x) ((x) & 0x7ffff) 148 #define DISP30(x) ((x) & 0x3fffffff) 149 #define SW_TRAP(x) ((x) & 0x7f) 150 151 #define OP3_OR 0x02 152 #define OP3_RD 0x28 153 #define OP3_JMPL 0x38 154 #define OP3_RETURN 0x39 155 #define OP3_TCC 0x3a 156 #define OP3_SAVE 0x3c 157 #define OP3_RESTORE 0x3d 158 159 #define OP3_PREFETCH 0x2d 160 #define OP3_CASA 0x3c 161 #define OP3_PREFETCHA 0x3d 162 #define OP3_CASXA 0x3e 163 164 #define OP2_ILLTRAP 0x0 165 #define OP2_BPcc 0x1 166 #define OP2_Bicc 0x2 167 #define OP2_BPr 0x3 168 #define OP2_SETHI 0x4 169 #define OP2_FBPfcc 0x5 170 #define OP2_FBfcc 0x6 171 172 #define R_G0 0 173 #define R_O0 8 174 #define R_SP 14 175 #define R_I0 24 176 #define R_I1 25 177 #define R_I2 26 178 #define R_I3 27 179 180 /* 181 * Check the comment in fasttrap.h when changing these offsets or adding 182 * new instructions. 183 */ 184 #define FASTTRAP_OFF_SAVE 64 185 #define FASTTRAP_OFF_RESTORE 68 186 #define FASTTRAP_OFF_FTRET 72 187 #define FASTTRAP_OFF_RETURN 76 188 189 #define BREAKPOINT_INSTR 0x91d02001 /* ta 1 */ 190 191 /* 192 * Tunable to let users turn off the fancy save instruction optimization. 193 * If a program is non-ABI compliant, there's a possibility that the save 194 * instruction optimization could cause an error. 195 */ 196 int fasttrap_optimize_save = 1; 197 198 static uint64_t 199 fasttrap_anarg(struct regs *rp, int argno) 200 { 201 uint64_t value; 202 203 if (argno < 6) 204 return ((&rp->r_o0)[argno]); 205 206 if (curproc->p_model == DATAMODEL_NATIVE) { 207 struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS); 208 209 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 210 value = dtrace_fulword(&fr->fr_argd[argno]); 211 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR | 212 CPU_DTRACE_BADALIGN); 213 } else { 214 struct frame32 *fr = (struct frame32 *)rp->r_sp; 215 216 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 217 value = dtrace_fuword32(&fr->fr_argd[argno]); 218 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR | 219 CPU_DTRACE_BADALIGN); 220 } 221 222 return (value); 223 } 224 225 static ulong_t fasttrap_getreg(struct regs *, uint_t); 226 static void fasttrap_putreg(struct regs *, uint_t, ulong_t); 227 228 int 229 fasttrap_probe(struct regs *rp) 230 { 231 dtrace_probe(fasttrap_probe_id, 232 rp->r_o0, rp->r_o1, rp->r_o2, rp->r_o3, rp->r_o4); 233 234 rp->r_pc = rp->r_npc; 235 rp->r_npc = rp->r_pc + 4; 236 237 return (0); 238 } 239 240 static void 241 fasttrap_usdt_args(fasttrap_probe_t *probe, struct regs *rp, int argc, 242 uintptr_t *argv) 243 { 244 int i, x, cap = MIN(argc, probe->ftp_nargs); 245 246 if (curproc->p_model == DATAMODEL_NATIVE) { 247 struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS); 248 uintptr_t v; 249 250 for (i = 0; i < cap; i++) { 251 x = probe->ftp_argmap[i]; 252 253 if (x < 6) 254 argv[i] = (&rp->r_o0)[x]; 255 else if (fasttrap_fulword(&fr->fr_argd[x], &v) != 0) 256 argv[i] = 0; 257 } 258 259 } else { 260 struct frame32 *fr = (struct frame32 *)rp->r_sp; 261 uint32_t v; 262 263 for (i = 0; i < cap; i++) { 264 x = probe->ftp_argmap[i]; 265 266 if (x < 6) 267 argv[i] = (&rp->r_o0)[x]; 268 else if (fasttrap_fuword32(&fr->fr_argd[x], &v) != 0) 269 argv[i] = 0; 270 } 271 } 272 273 for (; i < argc; i++) { 274 argv[i] = 0; 275 } 276 } 277 278 static void 279 fasttrap_return_common(struct regs *rp, uintptr_t pc, pid_t pid, 280 uint_t fake_restore) 281 { 282 fasttrap_tracepoint_t *tp; 283 fasttrap_bucket_t *bucket; 284 fasttrap_id_t *id; 285 kmutex_t *pid_mtx; 286 dtrace_icookie_t cookie; 287 288 pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock; 289 mutex_enter(pid_mtx); 290 bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]; 291 292 for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { 293 if (pid == tp->ftt_pid && pc == tp->ftt_pc && 294 !tp->ftt_proc->ftpc_defunct) 295 break; 296 } 297 298 /* 299 * Don't sweat it if we can't find the tracepoint again; unlike 300 * when we're in fasttrap_pid_probe(), finding the tracepoint here 301 * is not essential to the correct execution of the process. 302 */ 303 if (tp == NULL || tp->ftt_retids == NULL) { 304 mutex_exit(pid_mtx); 305 return; 306 } 307 308 for (id = tp->ftt_retids; id != NULL; id = id->fti_next) { 309 fasttrap_probe_t *probe = id->fti_probe; 310 311 if (id->fti_ptype == DTFTP_POST_OFFSETS) { 312 if (probe->ftp_argmap == NULL) { 313 dtrace_probe(probe->ftp_id, rp->r_o0, rp->r_o1, 314 rp->r_o2, rp->r_o3, rp->r_o4); 315 } else { 316 uintptr_t t[5]; 317 318 fasttrap_usdt_args(probe, rp, 319 sizeof (t) / sizeof (t[0]), t); 320 321 dtrace_probe(probe->ftp_id, t[0], t[1], 322 t[2], t[3], t[4]); 323 } 324 continue; 325 } 326 327 /* 328 * If this is only a possible return point, we must 329 * be looking at a potential tail call in leaf context. 330 * If the %npc is still within this function, then we 331 * must have misidentified a jmpl as a tail-call when it 332 * is, in fact, part of a jump table. It would be nice to 333 * remove this tracepoint, but this is neither the time 334 * nor the place. 335 */ 336 if ((tp->ftt_flags & FASTTRAP_F_RETMAYBE) && 337 rp->r_npc - probe->ftp_faddr < probe->ftp_fsize) 338 continue; 339 340 /* 341 * It's possible for a function to branch to the delay slot 342 * of an instruction that we've identified as a return site. 343 * We can dectect this spurious return probe activation by 344 * observing that in this case %npc will be %pc + 4 and %npc 345 * will be inside the current function (unless the user is 346 * doing _crazy_ instruction picking in which case there's 347 * very little we can do). The second check is important 348 * in case the last instructions of a function make a tail- 349 * call to the function located immediately subsequent. 350 */ 351 if (rp->r_npc == rp->r_pc + 4 && 352 rp->r_npc - probe->ftp_faddr < probe->ftp_fsize) 353 continue; 354 355 /* 356 * The first argument is the offset of return tracepoint 357 * in the function; the remaining arguments are the return 358 * values. 359 * 360 * If fake_restore is set, we need to pull the return values 361 * out of the %i's rather than the %o's -- a little trickier. 362 */ 363 if (!fake_restore) { 364 dtrace_probe(probe->ftp_id, pc - probe->ftp_faddr, 365 rp->r_o0, rp->r_o1, rp->r_o2, rp->r_o3); 366 } else { 367 uintptr_t arg0 = fasttrap_getreg(rp, R_I0); 368 uintptr_t arg1 = fasttrap_getreg(rp, R_I1); 369 uintptr_t arg2 = fasttrap_getreg(rp, R_I2); 370 uintptr_t arg3 = fasttrap_getreg(rp, R_I3); 371 372 cookie = dtrace_interrupt_disable(); 373 DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE); 374 dtrace_probe(probe->ftp_id, pc - probe->ftp_faddr, 375 arg0, arg1, arg2, arg3); 376 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE); 377 dtrace_interrupt_enable(cookie); 378 } 379 } 380 381 mutex_exit(pid_mtx); 382 } 383 384 int 385 fasttrap_pid_probe(struct regs *rp) 386 { 387 proc_t *p = curproc; 388 fasttrap_tracepoint_t *tp, tp_local; 389 fasttrap_id_t *id; 390 pid_t pid; 391 uintptr_t pc = rp->r_pc; 392 uintptr_t npc = rp->r_npc; 393 uintptr_t orig_pc = pc; 394 fasttrap_bucket_t *bucket; 395 kmutex_t *pid_mtx; 396 uint_t fake_restore = 0, is_enabled = 0; 397 dtrace_icookie_t cookie; 398 399 /* 400 * It's possible that a user (in a veritable orgy of bad planning) 401 * could redirect this thread's flow of control before it reached the 402 * return probe fasttrap. In this case we need to kill the process 403 * since it's in a unrecoverable state. 404 */ 405 if (curthread->t_dtrace_step) { 406 ASSERT(curthread->t_dtrace_on); 407 fasttrap_sigtrap(p, curthread, pc); 408 return (0); 409 } 410 411 /* 412 * Clear all user tracing flags. 413 */ 414 curthread->t_dtrace_ft = 0; 415 curthread->t_dtrace_pc = 0; 416 curthread->t_dtrace_npc = 0; 417 curthread->t_dtrace_scrpc = 0; 418 curthread->t_dtrace_astpc = 0; 419 420 /* 421 * Treat a child created by a call to vfork(2) as if it were its 422 * parent. We know that there's only one thread of control in such a 423 * process: this one. 424 */ 425 while (p->p_flag & SVFORK) { 426 p = p->p_parent; 427 } 428 429 pid = p->p_pid; 430 pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock; 431 mutex_enter(pid_mtx); 432 bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]; 433 434 /* 435 * Lookup the tracepoint that the process just hit. 436 */ 437 for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { 438 if (pid == tp->ftt_pid && pc == tp->ftt_pc && 439 !tp->ftt_proc->ftpc_defunct) 440 break; 441 } 442 443 /* 444 * If we couldn't find a matching tracepoint, either a tracepoint has 445 * been inserted without using the pid<pid> ioctl interface (see 446 * fasttrap_ioctl), or somehow we have mislaid this tracepoint. 447 */ 448 if (tp == NULL) { 449 mutex_exit(pid_mtx); 450 return (-1); 451 } 452 453 for (id = tp->ftt_ids; id != NULL; id = id->fti_next) { 454 fasttrap_probe_t *probe = id->fti_probe; 455 int isentry = (id->fti_ptype == DTFTP_ENTRY); 456 457 if (id->fti_ptype == DTFTP_IS_ENABLED) { 458 is_enabled = 1; 459 continue; 460 } 461 462 /* 463 * We note that this was an entry probe to help ustack() find 464 * the first caller. 465 */ 466 if (isentry) { 467 cookie = dtrace_interrupt_disable(); 468 DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY); 469 } 470 dtrace_probe(probe->ftp_id, rp->r_o0, rp->r_o1, rp->r_o2, 471 rp->r_o3, rp->r_o4); 472 if (isentry) { 473 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY); 474 dtrace_interrupt_enable(cookie); 475 } 476 } 477 478 /* 479 * We're about to do a bunch of work so we cache a local copy of 480 * the tracepoint to emulate the instruction, and then find the 481 * tracepoint again later if we need to light up any return probes. 482 */ 483 tp_local = *tp; 484 mutex_exit(pid_mtx); 485 tp = &tp_local; 486 487 /* 488 * If there's an is-enabled probe conntected to this tracepoint it 489 * means that there was a 'mov %g0, %o0' instruction that was placed 490 * there by DTrace when the binary was linked. As this probe is, in 491 * fact, enabled, we need to stuff 1 into %o0. Accordingly, we can 492 * bypass all the instruction emulation logic since we know the 493 * inevitable result. It's possible that a user could construct a 494 * scenario where the 'is-enabled' probe was on some other 495 * instruction, but that would be a rather exotic way to shoot oneself 496 * in the foot. 497 */ 498 if (is_enabled) { 499 rp->r_o0 = 1; 500 pc = rp->r_npc; 501 npc = pc + 4; 502 goto done; 503 } 504 505 /* 506 * We emulate certain types of instructions to ensure correctness 507 * (in the case of position dependent instructions) or optimize 508 * common cases. The rest we have the thread execute back in user- 509 * land. 510 */ 511 switch (tp->ftt_type) { 512 case FASTTRAP_T_SAVE: 513 { 514 int32_t imm; 515 516 /* 517 * This an optimization to let us handle function entry 518 * probes more efficiently. Many functions begin with a save 519 * instruction that follows the pattern: 520 * save %sp, <imm>, %sp 521 * 522 * Meanwhile, we've stashed the instruction: 523 * save %g1, %g0, %sp 524 * 525 * off of %g7, so all we have to do is stick the right value 526 * into %g1 and reset %pc to point to the instruction we've 527 * cleverly hidden (%npc should not be touched). 528 */ 529 530 imm = tp->ftt_instr << 19; 531 imm >>= 19; 532 rp->r_g1 = rp->r_sp + imm; 533 pc = rp->r_g7 + FASTTRAP_OFF_SAVE; 534 break; 535 } 536 537 case FASTTRAP_T_RESTORE: 538 { 539 ulong_t value; 540 uint_t rd; 541 542 /* 543 * This is an optimization to let us handle function 544 * return probes more efficiently. Most non-leaf functions 545 * end with the sequence: 546 * ret 547 * restore <reg>, <reg_or_imm>, %oX 548 * 549 * We've stashed the instruction: 550 * restore %g0, %g0, %g0 551 * 552 * off of %g7 so we just need to place the correct value 553 * in the right %i register (since after our fake-o 554 * restore, the %i's will become the %o's) and set the %pc 555 * to point to our hidden restore. We also set fake_restore to 556 * let fasttrap_return_common() know that it will find the 557 * return values in the %i's rather than the %o's. 558 */ 559 560 if (I(tp->ftt_instr)) { 561 int32_t imm; 562 563 imm = tp->ftt_instr << 19; 564 imm >>= 19; 565 value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + imm; 566 } else { 567 value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + 568 fasttrap_getreg(rp, RS2(tp->ftt_instr)); 569 } 570 571 /* 572 * Convert %o's to %i's; leave %g's as they are. 573 */ 574 rd = RD(tp->ftt_instr); 575 fasttrap_putreg(rp, ((rd & 0x18) == 0x8) ? rd + 16 : rd, value); 576 577 pc = rp->r_g7 + FASTTRAP_OFF_RESTORE; 578 fake_restore = 1; 579 break; 580 } 581 582 case FASTTRAP_T_RETURN: 583 { 584 uintptr_t target; 585 586 /* 587 * A return instruction is like a jmpl (without the link 588 * part) that executes an implicit restore. We've stashed 589 * the instruction: 590 * return %o0 591 * 592 * off of %g7 so we just need to place the target in %o0 593 * and set the %pc to point to the stashed return instruction. 594 * We use %o0 since that register disappears after the return 595 * executes, erasing any evidence of this tampering. 596 */ 597 if (I(tp->ftt_instr)) { 598 int32_t imm; 599 600 imm = tp->ftt_instr << 19; 601 imm >>= 19; 602 target = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + imm; 603 } else { 604 target = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + 605 fasttrap_getreg(rp, RS2(tp->ftt_instr)); 606 } 607 608 fasttrap_putreg(rp, R_O0, target); 609 610 pc = rp->r_g7 + FASTTRAP_OFF_RETURN; 611 fake_restore = 1; 612 break; 613 } 614 615 case FASTTRAP_T_OR: 616 { 617 ulong_t value; 618 619 if (I(tp->ftt_instr)) { 620 int32_t imm; 621 622 imm = tp->ftt_instr << 19; 623 imm >>= 19; 624 value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) | imm; 625 } else { 626 value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) | 627 fasttrap_getreg(rp, RS2(tp->ftt_instr)); 628 } 629 630 fasttrap_putreg(rp, RD(tp->ftt_instr), value); 631 pc = rp->r_npc; 632 npc = pc + 4; 633 break; 634 } 635 636 case FASTTRAP_T_SETHI: 637 if (RD(tp->ftt_instr) != R_G0) { 638 uint32_t imm32 = tp->ftt_instr << 10; 639 fasttrap_putreg(rp, RD(tp->ftt_instr), (ulong_t)imm32); 640 } 641 pc = rp->r_npc; 642 npc = pc + 4; 643 break; 644 645 case FASTTRAP_T_CCR: 646 { 647 uint_t c, v, z, n, taken; 648 uint_t ccr = rp->r_tstate >> TSTATE_CCR_SHIFT; 649 650 if (tp->ftt_cc != 0) 651 ccr >>= 4; 652 653 c = (ccr >> 0) & 1; 654 v = (ccr >> 1) & 1; 655 z = (ccr >> 2) & 1; 656 n = (ccr >> 3) & 1; 657 658 switch (tp->ftt_code) { 659 case 0x0: /* BN */ 660 taken = 0; break; 661 case 0x1: /* BE */ 662 taken = z; break; 663 case 0x2: /* BLE */ 664 taken = z | (n ^ v); break; 665 case 0x3: /* BL */ 666 taken = n ^ v; break; 667 case 0x4: /* BLEU */ 668 taken = c | z; break; 669 case 0x5: /* BCS (BLU) */ 670 taken = c; break; 671 case 0x6: /* BNEG */ 672 taken = n; break; 673 case 0x7: /* BVS */ 674 taken = v; break; 675 case 0x8: /* BA */ 676 /* 677 * We handle the BA case differently since the annul 678 * bit means something slightly different. 679 */ 680 panic("fasttrap: mishandled a branch"); 681 taken = 1; break; 682 case 0x9: /* BNE */ 683 taken = ~z; break; 684 case 0xa: /* BG */ 685 taken = ~(z | (n ^ v)); break; 686 case 0xb: /* BGE */ 687 taken = ~(n ^ v); break; 688 case 0xc: /* BGU */ 689 taken = ~(c | z); break; 690 case 0xd: /* BCC (BGEU) */ 691 taken = ~c; break; 692 case 0xe: /* BPOS */ 693 taken = ~n; break; 694 case 0xf: /* BVC */ 695 taken = ~v; break; 696 } 697 698 if (taken & 1) { 699 pc = rp->r_npc; 700 npc = tp->ftt_dest; 701 } else if (tp->ftt_flags & FASTTRAP_F_ANNUL) { 702 /* 703 * Untaken annulled branches don't execute the 704 * instruction in the delay slot. 705 */ 706 pc = rp->r_npc + 4; 707 npc = pc + 4; 708 } else { 709 pc = rp->r_npc; 710 npc = pc + 4; 711 } 712 break; 713 } 714 715 case FASTTRAP_T_FCC: 716 { 717 uint_t fcc; 718 uint_t taken; 719 uint64_t fsr; 720 721 dtrace_getfsr(&fsr); 722 723 if (tp->ftt_cc == 0) { 724 fcc = (fsr >> 10) & 0x3; 725 } else { 726 uint_t shift; 727 ASSERT(tp->ftt_cc <= 3); 728 shift = 30 + tp->ftt_cc * 2; 729 fcc = (fsr >> shift) & 0x3; 730 } 731 732 switch (tp->ftt_code) { 733 case 0x0: /* FBN */ 734 taken = (1 << fcc) & (0|0|0|0); break; 735 case 0x1: /* FBNE */ 736 taken = (1 << fcc) & (8|4|2|0); break; 737 case 0x2: /* FBLG */ 738 taken = (1 << fcc) & (0|4|2|0); break; 739 case 0x3: /* FBUL */ 740 taken = (1 << fcc) & (8|0|2|0); break; 741 case 0x4: /* FBL */ 742 taken = (1 << fcc) & (0|0|2|0); break; 743 case 0x5: /* FBUG */ 744 taken = (1 << fcc) & (8|4|0|0); break; 745 case 0x6: /* FBG */ 746 taken = (1 << fcc) & (0|4|0|0); break; 747 case 0x7: /* FBU */ 748 taken = (1 << fcc) & (8|0|0|0); break; 749 case 0x8: /* FBA */ 750 /* 751 * We handle the FBA case differently since the annul 752 * bit means something slightly different. 753 */ 754 panic("fasttrap: mishandled a branch"); 755 taken = (1 << fcc) & (8|4|2|1); break; 756 case 0x9: /* FBE */ 757 taken = (1 << fcc) & (0|0|0|1); break; 758 case 0xa: /* FBUE */ 759 taken = (1 << fcc) & (8|0|0|1); break; 760 case 0xb: /* FBGE */ 761 taken = (1 << fcc) & (0|4|0|1); break; 762 case 0xc: /* FBUGE */ 763 taken = (1 << fcc) & (8|4|0|1); break; 764 case 0xd: /* FBLE */ 765 taken = (1 << fcc) & (0|0|2|1); break; 766 case 0xe: /* FBULE */ 767 taken = (1 << fcc) & (8|0|2|1); break; 768 case 0xf: /* FBO */ 769 taken = (1 << fcc) & (0|4|2|1); break; 770 } 771 772 if (taken) { 773 pc = rp->r_npc; 774 npc = tp->ftt_dest; 775 } else if (tp->ftt_flags & FASTTRAP_F_ANNUL) { 776 /* 777 * Untaken annulled branches don't execute the 778 * instruction in the delay slot. 779 */ 780 pc = rp->r_npc + 4; 781 npc = pc + 4; 782 } else { 783 pc = rp->r_npc; 784 npc = pc + 4; 785 } 786 break; 787 } 788 789 case FASTTRAP_T_REG: 790 { 791 uint64_t value; 792 uint_t taken; 793 uint_t reg = RS1(tp->ftt_instr); 794 795 /* 796 * An ILP32 process shouldn't be using a branch predicated on 797 * an %i or an %l since it would violate the ABI. It's a 798 * violation of the ABI because we can't ensure deterministic 799 * behavior. We should have identified this case when we 800 * enabled the probe. 801 */ 802 ASSERT(p->p_model == DATAMODEL_LP64 || reg < 16); 803 804 value = fasttrap_getreg(rp, reg); 805 806 switch (tp->ftt_code) { 807 case 0x1: /* BRZ */ 808 taken = (value == 0); break; 809 case 0x2: /* BRLEZ */ 810 taken = (value <= 0); break; 811 case 0x3: /* BRLZ */ 812 taken = (value < 0); break; 813 case 0x5: /* BRNZ */ 814 taken = (value != 0); break; 815 case 0x6: /* BRGZ */ 816 taken = (value > 0); break; 817 case 0x7: /* BRGEZ */ 818 taken = (value <= 0); break; 819 default: 820 case 0x0: 821 case 0x4: 822 panic("fasttrap: mishandled a branch"); 823 } 824 825 if (taken) { 826 pc = rp->r_npc; 827 npc = tp->ftt_dest; 828 } else if (tp->ftt_flags & FASTTRAP_F_ANNUL) { 829 /* 830 * Untaken annulled branches don't execute the 831 * instruction in the delay slot. 832 */ 833 pc = rp->r_npc + 4; 834 npc = pc + 4; 835 } else { 836 pc = rp->r_npc; 837 npc = pc + 4; 838 } 839 break; 840 } 841 842 case FASTTRAP_T_ALWAYS: 843 /* 844 * BAs, BA,As... 845 */ 846 847 if (tp->ftt_flags & FASTTRAP_F_ANNUL) { 848 /* 849 * Annulled branch always instructions never execute 850 * the instruction in the delay slot. 851 */ 852 pc = tp->ftt_dest; 853 npc = tp->ftt_dest + 4; 854 } else { 855 pc = rp->r_npc; 856 npc = tp->ftt_dest; 857 } 858 break; 859 860 case FASTTRAP_T_RDPC: 861 fasttrap_putreg(rp, RD(tp->ftt_instr), rp->r_pc); 862 pc = rp->r_npc; 863 npc = pc + 4; 864 break; 865 866 case FASTTRAP_T_CALL: 867 /* 868 * It's a call _and_ link remember... 869 */ 870 rp->r_o7 = rp->r_pc; 871 pc = rp->r_npc; 872 npc = tp->ftt_dest; 873 break; 874 875 case FASTTRAP_T_JMPL: 876 pc = rp->r_npc; 877 878 if (I(tp->ftt_instr)) { 879 uint_t rs1 = RS1(tp->ftt_instr); 880 int32_t imm; 881 882 imm = tp->ftt_instr << 19; 883 imm >>= 19; 884 npc = fasttrap_getreg(rp, rs1) + imm; 885 } else { 886 uint_t rs1 = RS1(tp->ftt_instr); 887 uint_t rs2 = RS2(tp->ftt_instr); 888 889 npc = fasttrap_getreg(rp, rs1) + 890 fasttrap_getreg(rp, rs2); 891 } 892 893 /* 894 * Do the link part of the jump-and-link instruction. 895 */ 896 fasttrap_putreg(rp, RD(tp->ftt_instr), rp->r_pc); 897 898 break; 899 900 case FASTTRAP_T_COMMON: 901 { 902 curthread->t_dtrace_scrpc = rp->r_g7; 903 curthread->t_dtrace_astpc = rp->r_g7 + FASTTRAP_OFF_FTRET; 904 905 /* 906 * Copy the instruction to a reserved location in the 907 * user-land thread structure, then set the PC to that 908 * location and leave the NPC alone. We take pains to ensure 909 * consistency in the instruction stream (See SPARC 910 * Architecture Manual Version 9, sections 8.4.7, A.20, and 911 * H.1.6; UltraSPARC I/II User's Manual, sections 3.1.1.1, 912 * and 13.6.4) by using the ASI ASI_BLK_COMMIT_S to copy the 913 * instruction into the user's address space without 914 * bypassing the I$. There's no AS_USER version of this ASI 915 * (as exist for other ASIs) so we use the lofault 916 * mechanism to catch faults. 917 */ 918 if (dtrace_blksuword32(rp->r_g7, &tp->ftt_instr, 1) == -1) { 919 /* 920 * If the copyout fails, then the process's state 921 * is not consistent (the effects of the traced 922 * instruction will never be seen). This process 923 * cannot be allowed to continue execution. 924 */ 925 fasttrap_sigtrap(curproc, curthread, pc); 926 return (0); 927 } 928 929 curthread->t_dtrace_pc = pc; 930 curthread->t_dtrace_npc = npc; 931 curthread->t_dtrace_on = 1; 932 933 pc = curthread->t_dtrace_scrpc; 934 935 if (tp->ftt_retids != NULL) { 936 curthread->t_dtrace_step = 1; 937 curthread->t_dtrace_ret = 1; 938 npc = curthread->t_dtrace_astpc; 939 } 940 break; 941 } 942 943 default: 944 panic("fasttrap: mishandled an instruction"); 945 } 946 947 /* 948 * This bit me in the ass a couple of times, so lets toss this 949 * in as a cursory sanity check. 950 */ 951 ASSERT(pc != rp->r_g7 + 4); 952 ASSERT(pc != rp->r_g7 + 8); 953 954 done: 955 /* 956 * If there were no return probes when we first found the tracepoint, 957 * we should feel no obligation to honor any return probes that were 958 * subsequently enabled -- they'll just have to wait until the next 959 * time around. 960 */ 961 if (tp->ftt_retids != NULL) { 962 /* 963 * We need to wait until the results of the instruction are 964 * apparent before invoking any return probes. If this 965 * instruction was emulated we can just call 966 * fasttrap_return_common(); if it needs to be executed, we 967 * need to wait until we return to the kernel. 968 */ 969 if (tp->ftt_type != FASTTRAP_T_COMMON) { 970 fasttrap_return_common(rp, orig_pc, pid, fake_restore); 971 } else { 972 ASSERT(curthread->t_dtrace_ret != 0); 973 ASSERT(curthread->t_dtrace_pc == orig_pc); 974 ASSERT(curthread->t_dtrace_scrpc == rp->r_g7); 975 ASSERT(npc == curthread->t_dtrace_astpc); 976 } 977 } 978 979 ASSERT(pc != 0); 980 rp->r_pc = pc; 981 rp->r_npc = npc; 982 983 return (0); 984 } 985 986 int 987 fasttrap_return_probe(struct regs *rp) 988 { 989 proc_t *p = ttoproc(curthread); 990 pid_t pid; 991 uintptr_t pc = curthread->t_dtrace_pc; 992 uintptr_t npc = curthread->t_dtrace_npc; 993 994 curthread->t_dtrace_pc = 0; 995 curthread->t_dtrace_npc = 0; 996 curthread->t_dtrace_scrpc = 0; 997 curthread->t_dtrace_astpc = 0; 998 999 /* 1000 * Treat a child created by a call to vfork(2) as if it were its 1001 * parent. We know there's only one thread of control in such a 1002 * process: this one. 1003 */ 1004 while (p->p_flag & SVFORK) { 1005 p = p->p_parent; 1006 } 1007 1008 /* 1009 * We set the %pc and %npc to their values when the traced 1010 * instruction was initially executed so that it appears to 1011 * dtrace_probe() that we're on the original instruction, and so that 1012 * the user can't easily detect our complex web of lies. 1013 * dtrace_return_probe() (our caller) will correctly set %pc and %npc 1014 * after we return. 1015 */ 1016 rp->r_pc = pc; 1017 rp->r_npc = npc; 1018 1019 pid = p->p_pid; 1020 fasttrap_return_common(rp, pc, pid, 0); 1021 1022 return (0); 1023 } 1024 1025 int 1026 fasttrap_tracepoint_install(proc_t *p, fasttrap_tracepoint_t *tp) 1027 { 1028 fasttrap_instr_t instr = FASTTRAP_INSTR; 1029 1030 if (uwrite(p, &instr, 4, tp->ftt_pc) != 0) 1031 return (-1); 1032 1033 return (0); 1034 } 1035 1036 int 1037 fasttrap_tracepoint_remove(proc_t *p, fasttrap_tracepoint_t *tp) 1038 { 1039 fasttrap_instr_t instr; 1040 1041 /* 1042 * Distinguish between read or write failures and a changed 1043 * instruction. 1044 */ 1045 if (uread(p, &instr, 4, tp->ftt_pc) != 0) 1046 return (0); 1047 if (instr != FASTTRAP_INSTR && instr != BREAKPOINT_INSTR) 1048 return (0); 1049 if (uwrite(p, &tp->ftt_instr, 4, tp->ftt_pc) != 0) 1050 return (-1); 1051 1052 return (0); 1053 } 1054 1055 int 1056 fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, uintptr_t pc, 1057 fasttrap_probe_type_t type) 1058 { 1059 uint32_t instr; 1060 int32_t disp; 1061 1062 /* 1063 * Read the instruction at the given address out of the process's 1064 * address space. We don't have to worry about a debugger 1065 * changing this instruction before we overwrite it with our trap 1066 * instruction since P_PR_LOCK is set. 1067 */ 1068 if (uread(p, &instr, 4, pc) != 0) 1069 return (-1); 1070 1071 /* 1072 * Decode the instruction to fill in the probe flags. We can have 1073 * the process execute most instructions on its own using a pc/npc 1074 * trick, but pc-relative control transfer present a problem since 1075 * we're relocating the instruction. We emulate these instructions 1076 * in the kernel. We assume a default type and over-write that as 1077 * needed. 1078 * 1079 * pc-relative instructions must be emulated for correctness; 1080 * other instructions (which represent a large set of commonly traced 1081 * instructions) are emulated or otherwise optimized for performance. 1082 */ 1083 tp->ftt_type = FASTTRAP_T_COMMON; 1084 if (OP(instr) == 1) { 1085 /* 1086 * Call instructions. 1087 */ 1088 tp->ftt_type = FASTTRAP_T_CALL; 1089 disp = DISP30(instr) << 2; 1090 tp->ftt_dest = pc + (intptr_t)disp; 1091 1092 } else if (OP(instr) == 0) { 1093 /* 1094 * Branch instructions. 1095 * 1096 * Unconditional branches need careful attention when they're 1097 * annulled: annulled unconditional branches never execute 1098 * the instruction in the delay slot. 1099 */ 1100 switch (OP2(instr)) { 1101 case OP2_ILLTRAP: 1102 case 0x7: 1103 /* 1104 * The compiler may place an illtrap after a call to 1105 * a function that returns a structure. In the case of 1106 * a returned structure, the compiler places an illtrap 1107 * whose const22 field is the size of the returned 1108 * structure immediately following the delay slot of 1109 * the call. To stay out of the way, we refuse to 1110 * place tracepoints on top of illtrap instructions. 1111 * 1112 * This is one of the dumbest architectural decisions 1113 * I've ever had to work around. 1114 * 1115 * We also identify the only illegal op2 value (See 1116 * SPARC Architecture Manual Version 9, E.2 table 31). 1117 */ 1118 return (-1); 1119 1120 case OP2_BPcc: 1121 if (COND(instr) == 8) { 1122 tp->ftt_type = FASTTRAP_T_ALWAYS; 1123 } else { 1124 /* 1125 * Check for an illegal instruction. 1126 */ 1127 if (CC(instr) & 1) 1128 return (-1); 1129 tp->ftt_type = FASTTRAP_T_CCR; 1130 tp->ftt_cc = CC(instr); 1131 tp->ftt_code = COND(instr); 1132 } 1133 1134 if (A(instr) != 0) 1135 tp->ftt_flags |= FASTTRAP_F_ANNUL; 1136 1137 disp = DISP19(instr); 1138 disp <<= 13; 1139 disp >>= 11; 1140 tp->ftt_dest = pc + (intptr_t)disp; 1141 break; 1142 1143 case OP2_Bicc: 1144 if (COND(instr) == 8) { 1145 tp->ftt_type = FASTTRAP_T_ALWAYS; 1146 } else { 1147 tp->ftt_type = FASTTRAP_T_CCR; 1148 tp->ftt_cc = 0; 1149 tp->ftt_code = COND(instr); 1150 } 1151 1152 if (A(instr) != 0) 1153 tp->ftt_flags |= FASTTRAP_F_ANNUL; 1154 1155 disp = DISP22(instr); 1156 disp <<= 10; 1157 disp >>= 8; 1158 tp->ftt_dest = pc + (intptr_t)disp; 1159 break; 1160 1161 case OP2_BPr: 1162 /* 1163 * Check for an illegal instruction. 1164 */ 1165 if ((RCOND(instr) & 3) == 0) 1166 return (-1); 1167 1168 /* 1169 * It's a violation of the v8plus ABI to use a 1170 * register-predicated branch in a 32-bit app if 1171 * the register used is an %l or an %i (%gs and %os 1172 * are legit because they're not saved to the stack 1173 * in 32-bit words when we take a trap). 1174 */ 1175 if (p->p_model == DATAMODEL_ILP32 && RS1(instr) >= 16) 1176 return (-1); 1177 1178 tp->ftt_type = FASTTRAP_T_REG; 1179 if (A(instr) != 0) 1180 tp->ftt_flags |= FASTTRAP_F_ANNUL; 1181 disp = DISP16(instr); 1182 disp <<= 16; 1183 disp >>= 14; 1184 tp->ftt_dest = pc + (intptr_t)disp; 1185 tp->ftt_code = RCOND(instr); 1186 break; 1187 1188 case OP2_SETHI: 1189 tp->ftt_type = FASTTRAP_T_SETHI; 1190 break; 1191 1192 case OP2_FBPfcc: 1193 if (COND(instr) == 8) { 1194 tp->ftt_type = FASTTRAP_T_ALWAYS; 1195 } else { 1196 tp->ftt_type = FASTTRAP_T_FCC; 1197 tp->ftt_cc = CC(instr); 1198 tp->ftt_code = COND(instr); 1199 } 1200 1201 if (A(instr) != 0) 1202 tp->ftt_flags |= FASTTRAP_F_ANNUL; 1203 1204 disp = DISP19(instr); 1205 disp <<= 13; 1206 disp >>= 11; 1207 tp->ftt_dest = pc + (intptr_t)disp; 1208 break; 1209 1210 case OP2_FBfcc: 1211 if (COND(instr) == 8) { 1212 tp->ftt_type = FASTTRAP_T_ALWAYS; 1213 } else { 1214 tp->ftt_type = FASTTRAP_T_FCC; 1215 tp->ftt_cc = 0; 1216 tp->ftt_code = COND(instr); 1217 } 1218 1219 if (A(instr) != 0) 1220 tp->ftt_flags |= FASTTRAP_F_ANNUL; 1221 1222 disp = DISP22(instr); 1223 disp <<= 10; 1224 disp >>= 8; 1225 tp->ftt_dest = pc + (intptr_t)disp; 1226 break; 1227 } 1228 1229 } else if (OP(instr) == 2) { 1230 switch (OP3(instr)) { 1231 case OP3_RETURN: 1232 tp->ftt_type = FASTTRAP_T_RETURN; 1233 break; 1234 1235 case OP3_JMPL: 1236 tp->ftt_type = FASTTRAP_T_JMPL; 1237 break; 1238 1239 case OP3_RD: 1240 if (RS1(instr) == 5) 1241 tp->ftt_type = FASTTRAP_T_RDPC; 1242 break; 1243 1244 case OP3_SAVE: 1245 /* 1246 * We optimize for save instructions at function 1247 * entry; see the comment in fasttrap_pid_probe() 1248 * (near FASTTRAP_T_SAVE) for details. 1249 */ 1250 if (fasttrap_optimize_save != 0 && 1251 type == DTFTP_ENTRY && 1252 I(instr) == 1 && RD(instr) == R_SP) 1253 tp->ftt_type = FASTTRAP_T_SAVE; 1254 break; 1255 1256 case OP3_RESTORE: 1257 /* 1258 * We optimize restore instructions at function 1259 * return; see the comment in fasttrap_pid_probe() 1260 * (near FASTTRAP_T_RESTORE) for details. 1261 * 1262 * rd must be an %o or %g register. 1263 */ 1264 if ((RD(instr) & 0x10) == 0) 1265 tp->ftt_type = FASTTRAP_T_RESTORE; 1266 break; 1267 1268 case OP3_OR: 1269 /* 1270 * A large proportion of instructions in the delay 1271 * slot of retl instructions are or's so we emulate 1272 * these downstairs as an optimization. 1273 */ 1274 tp->ftt_type = FASTTRAP_T_OR; 1275 break; 1276 1277 case OP3_TCC: 1278 /* 1279 * Breakpoint instructions are effectively position- 1280 * dependent since the debugger uses the %pc value 1281 * to lookup which breakpoint was executed. As a 1282 * result, we can't actually instrument breakpoints. 1283 */ 1284 if (SW_TRAP(instr) == ST_BREAKPOINT) 1285 return (-1); 1286 break; 1287 1288 case 0x19: 1289 case 0x1d: 1290 case 0x29: 1291 case 0x33: 1292 case 0x3f: 1293 /* 1294 * Identify illegal instructions (See SPARC 1295 * Architecture Manual Version 9, E.2 table 32). 1296 */ 1297 return (-1); 1298 } 1299 } else if (OP(instr) == 3) { 1300 uint32_t op3 = OP3(instr); 1301 1302 /* 1303 * Identify illegal instructions (See SPARC Architecture 1304 * Manual Version 9, E.2 table 33). 1305 */ 1306 if ((op3 & 0x28) == 0x28) { 1307 if (op3 != OP3_PREFETCH && op3 != OP3_CASA && 1308 op3 != OP3_PREFETCHA && op3 != OP3_CASXA) 1309 return (-1); 1310 } else { 1311 if ((op3 & 0x0f) == 0x0c || (op3 & 0x3b) == 0x31) 1312 return (-1); 1313 } 1314 } 1315 1316 tp->ftt_instr = instr; 1317 1318 /* 1319 * We don't know how this tracepoint is going to be used, but in case 1320 * it's used as part of a function return probe, we need to indicate 1321 * whether it's always a return site or only potentially a return 1322 * site. If it's part of a return probe, it's always going to be a 1323 * return from that function if it's a restore instruction or if 1324 * the previous instruction was a return. If we could reliably 1325 * distinguish jump tables from return sites, this wouldn't be 1326 * necessary. 1327 */ 1328 if (tp->ftt_type != FASTTRAP_T_RESTORE && 1329 (uread(p, &instr, 4, pc - sizeof (instr)) != 0 || 1330 !(OP(instr) == 2 && OP3(instr) == OP3_RETURN))) 1331 tp->ftt_flags |= FASTTRAP_F_RETMAYBE; 1332 1333 return (0); 1334 } 1335 1336 /*ARGSUSED*/ 1337 uint64_t 1338 fasttrap_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes) 1339 { 1340 return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, argno)); 1341 } 1342 1343 /*ARGSUSED*/ 1344 uint64_t 1345 fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno, 1346 int aframes) 1347 { 1348 return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, argno)); 1349 } 1350 1351 static uint64_t fasttrap_getreg_fast_cnt; 1352 static uint64_t fasttrap_getreg_mpcb_cnt; 1353 static uint64_t fasttrap_getreg_slow_cnt; 1354 1355 static ulong_t 1356 fasttrap_getreg(struct regs *rp, uint_t reg) 1357 { 1358 ulong_t value; 1359 dtrace_icookie_t cookie; 1360 struct machpcb *mpcb; 1361 extern ulong_t dtrace_getreg_win(uint_t, uint_t); 1362 1363 /* 1364 * We have the %os and %gs in our struct regs, but if we need to 1365 * snag a %l or %i we need to go scrounging around in the process's 1366 * address space. 1367 */ 1368 if (reg == 0) 1369 return (0); 1370 1371 if (reg < 16) 1372 return ((&rp->r_g1)[reg - 1]); 1373 1374 /* 1375 * Before we look at the user's stack, we'll check the register 1376 * windows to see if the information we want is in there. 1377 */ 1378 cookie = dtrace_interrupt_disable(); 1379 if (dtrace_getotherwin() > 0) { 1380 value = dtrace_getreg_win(reg, 1); 1381 dtrace_interrupt_enable(cookie); 1382 1383 atomic_add_64(&fasttrap_getreg_fast_cnt, 1); 1384 1385 return (value); 1386 } 1387 dtrace_interrupt_enable(cookie); 1388 1389 /* 1390 * First check the machpcb structure to see if we've already read 1391 * in the register window we're looking for; if we haven't, (and 1392 * we probably haven't) try to copy in the value of the register. 1393 */ 1394 mpcb = (struct machpcb *)((caddr_t)rp - REGOFF); 1395 1396 if (get_udatamodel() == DATAMODEL_NATIVE) { 1397 struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS); 1398 1399 if (mpcb->mpcb_wbcnt > 0) { 1400 struct rwindow *rwin = (void *)mpcb->mpcb_wbuf; 1401 int i = mpcb->mpcb_wbcnt; 1402 do { 1403 i--; 1404 if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp) 1405 continue; 1406 1407 atomic_add_64(&fasttrap_getreg_mpcb_cnt, 1); 1408 return (rwin[i].rw_local[reg - 16]); 1409 } while (i > 0); 1410 } 1411 1412 if (fasttrap_fulword(&fr->fr_local[reg - 16], &value) != 0) 1413 goto err; 1414 } else { 1415 struct frame32 *fr = 1416 (struct frame32 *)(uintptr_t)(caddr32_t)rp->r_sp; 1417 uint32_t *v32 = (uint32_t *)&value; 1418 1419 if (mpcb->mpcb_wbcnt > 0) { 1420 struct rwindow32 *rwin = (void *)mpcb->mpcb_wbuf; 1421 int i = mpcb->mpcb_wbcnt; 1422 do { 1423 i--; 1424 if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp) 1425 continue; 1426 1427 atomic_add_64(&fasttrap_getreg_mpcb_cnt, 1); 1428 return (rwin[i].rw_local[reg - 16]); 1429 } while (i > 0); 1430 } 1431 1432 if (fasttrap_fuword32(&fr->fr_local[reg - 16], &v32[1]) != 0) 1433 goto err; 1434 1435 v32[0] = 0; 1436 } 1437 1438 atomic_add_64(&fasttrap_getreg_slow_cnt, 1); 1439 return (value); 1440 1441 err: 1442 /* 1443 * If the copy in failed, the process will be in a irrecoverable 1444 * state, and we have no choice but to kill it. 1445 */ 1446 psignal(ttoproc(curthread), SIGILL); 1447 return (0); 1448 } 1449 1450 static uint64_t fasttrap_putreg_fast_cnt; 1451 static uint64_t fasttrap_putreg_mpcb_cnt; 1452 static uint64_t fasttrap_putreg_slow_cnt; 1453 1454 static void 1455 fasttrap_putreg(struct regs *rp, uint_t reg, ulong_t value) 1456 { 1457 dtrace_icookie_t cookie; 1458 struct machpcb *mpcb; 1459 extern void dtrace_putreg_win(uint_t, ulong_t); 1460 1461 if (reg == 0) 1462 return; 1463 1464 if (reg < 16) { 1465 (&rp->r_g1)[reg - 1] = value; 1466 return; 1467 } 1468 1469 /* 1470 * If the user process is still using some register windows, we 1471 * can just place the value in the correct window. 1472 */ 1473 cookie = dtrace_interrupt_disable(); 1474 if (dtrace_getotherwin() > 0) { 1475 dtrace_putreg_win(reg, value); 1476 dtrace_interrupt_enable(cookie); 1477 atomic_add_64(&fasttrap_putreg_fast_cnt, 1); 1478 return; 1479 } 1480 dtrace_interrupt_enable(cookie); 1481 1482 /* 1483 * First see if there's a copy of the register window in the 1484 * machpcb structure that we can modify; if there isn't try to 1485 * copy out the value. If that fails, we try to create a new 1486 * register window in the machpcb structure. While this isn't 1487 * _precisely_ the intended use of the machpcb structure, it 1488 * can't cause any problems since we know at this point in the 1489 * code that all of the user's data have been flushed out of the 1490 * register file (since %otherwin is 0). 1491 */ 1492 mpcb = (struct machpcb *)((caddr_t)rp - REGOFF); 1493 1494 if (get_udatamodel() == DATAMODEL_NATIVE) { 1495 struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS); 1496 struct rwindow *rwin = (struct rwindow *)mpcb->mpcb_wbuf; 1497 1498 if (mpcb->mpcb_wbcnt > 0) { 1499 int i = mpcb->mpcb_wbcnt; 1500 do { 1501 i--; 1502 if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp) 1503 continue; 1504 1505 rwin[i].rw_local[reg - 16] = value; 1506 atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1); 1507 return; 1508 } while (i > 0); 1509 } 1510 1511 if (fasttrap_sulword(&fr->fr_local[reg - 16], value) != 0) { 1512 if (mpcb->mpcb_wbcnt >= MAXWIN || copyin(fr, 1513 &rwin[mpcb->mpcb_wbcnt], sizeof (*rwin)) != 0) 1514 goto err; 1515 1516 rwin[mpcb->mpcb_wbcnt].rw_local[reg - 16] = value; 1517 mpcb->mpcb_spbuf[mpcb->mpcb_wbcnt] = (caddr_t)rp->r_sp; 1518 mpcb->mpcb_wbcnt++; 1519 atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1); 1520 return; 1521 } 1522 } else { 1523 struct frame32 *fr = 1524 (struct frame32 *)(uintptr_t)(caddr32_t)rp->r_sp; 1525 struct rwindow32 *rwin = (struct rwindow32 *)mpcb->mpcb_wbuf; 1526 uint32_t v32 = (uint32_t)value; 1527 1528 if (mpcb->mpcb_wbcnt > 0) { 1529 int i = mpcb->mpcb_wbcnt; 1530 do { 1531 i--; 1532 if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp) 1533 continue; 1534 1535 rwin[i].rw_local[reg - 16] = v32; 1536 atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1); 1537 return; 1538 } while (i > 0); 1539 } 1540 1541 if (fasttrap_suword32(&fr->fr_local[reg - 16], v32) != 0) { 1542 if (mpcb->mpcb_wbcnt >= MAXWIN || copyin(fr, 1543 &rwin[mpcb->mpcb_wbcnt], sizeof (*rwin)) != 0) 1544 goto err; 1545 1546 rwin[mpcb->mpcb_wbcnt].rw_local[reg - 16] = v32; 1547 mpcb->mpcb_spbuf[mpcb->mpcb_wbcnt] = (caddr_t)rp->r_sp; 1548 mpcb->mpcb_wbcnt++; 1549 atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1); 1550 return; 1551 } 1552 } 1553 1554 atomic_add_64(&fasttrap_putreg_slow_cnt, 1); 1555 return; 1556 1557 err: 1558 /* 1559 * If we couldn't record this register's value, the process is in an 1560 * irrecoverable state and we have no choice but to euthanize it. 1561 */ 1562 psignal(ttoproc(curthread), SIGILL); 1563 } 1564