1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/fasttrap_isa.h> 30 #include <sys/fasttrap_impl.h> 31 #include <sys/dtrace.h> 32 #include <sys/dtrace_impl.h> 33 #include <sys/cmn_err.h> 34 #include <sys/frame.h> 35 #include <sys/stack.h> 36 #include <sys/sysmacros.h> 37 #include <sys/trap.h> 38 39 #include <v9/sys/machpcb.h> 40 #include <v9/sys/privregs.h> 41 42 /* 43 * Lossless User-Land Tracing on SPARC 44 * ----------------------------------- 45 * 46 * The Basic Idea 47 * 48 * The most important design constraint is, of course, correct execution of 49 * the user thread above all else. The next most important goal is rapid 50 * execution. We combine execution of instructions in user-land with 51 * emulation of certain instructions in the kernel to aim for complete 52 * correctness and maximal performance. 53 * 54 * We take advantage of the split PC/NPC architecture to speed up logical 55 * single-stepping; when we copy an instruction out to the scratch space in 56 * the ulwp_t structure (held in the %g7 register on SPARC), we can 57 * effectively single step by setting the PC to our scratch space and leaving 58 * the NPC alone. This executes the replaced instruction and then continues 59 * on without having to reenter the kernel as with single- stepping. The 60 * obvious caveat is for instructions whose execution is PC dependant -- 61 * branches, call and link instructions (call and jmpl), and the rdpc 62 * instruction. These instructions cannot be executed in the manner described 63 * so they must be emulated in the kernel. 64 * 65 * Emulation for this small set of instructions if fairly simple; the most 66 * difficult part being emulating branch conditions. 67 * 68 * 69 * A Cache Heavy Portfolio 70 * 71 * It's important to note at this time that copying an instruction out to the 72 * ulwp_t scratch space in user-land is rather complicated. SPARC has 73 * separate data and instruction caches so any writes to the D$ (using a 74 * store instruction for example) aren't necessarily reflected in the I$. 75 * The flush instruction can be used to synchronize the two and must be used 76 * for any self-modifying code, but the flush instruction only applies to the 77 * primary address space (the absence of a flusha analogue to the flush 78 * instruction that accepts an ASI argument is an obvious omission from SPARC 79 * v9 where the notion of the alternate address space was introduced on 80 * SPARC). To correctly copy out the instruction we must use a block store 81 * that doesn't allocate in the D$ and ensures synchronization with the I$; 82 * see dtrace_blksuword32() for the implementation (this function uses 83 * ASI_BLK_COMMIT_S to write a block through the secondary ASI in the manner 84 * described). Refer to the UltraSPARC I/II manual for details on the 85 * ASI_BLK_COMMIT_S ASI. 86 * 87 * 88 * Return Subtleties 89 * 90 * When we're firing a return probe we need to expose the value returned by 91 * the function being traced. Since the function can set the return value 92 * in its last instruction, we need to fire the return probe only _after_ 93 * the effects of the instruction are apparent. For instructions that we 94 * emulate, we can call dtrace_probe() after we've performed the emulation; 95 * for instructions that we execute after we return to user-land, we set 96 * %pc to the instruction we copied out (as described above) and set %npc 97 * to a trap instruction stashed in the ulwp_t structure. After the traced 98 * instruction is executed, the trap instruction returns control to the 99 * kernel where we can fire the return probe. 100 * 101 * This need for a second trap in cases where we execute the traced 102 * instruction makes it all the more important to emulate the most common 103 * instructions to avoid the second trip in and out of the kernel. 104 * 105 * 106 * Making it Fast 107 * 108 * Since copying out an instruction is neither simple nor inexpensive for the 109 * CPU, we should attempt to avoid doing it in as many cases as possible. 110 * Since function entry and return are usually the most interesting probe 111 * sites, we attempt to tune the performance of the fasttrap provider around 112 * instructions typically in those places. 113 * 114 * Looking at a bunch of functions in libraries and executables reveals that 115 * most functions begin with either a save or a sethi (to setup a larger 116 * argument to the save) and end with a restore or an or (in the case of leaf 117 * functions). To try to improve performance, we emulate all of these 118 * instructions in the kernel. 119 * 120 * The save and restore instructions are a little tricky since they perform 121 * register window maniplulation. Rather than trying to tinker with the 122 * register windows from the kernel, we emulate the implicit add that takes 123 * place as part of those instructions and set the %pc to point to a simple 124 * save or restore we've hidden in the ulwp_t structure. If we're in a return 125 * probe so want to make it seem as though the tracepoint has been completely 126 * executed we need to remember that we've pulled this trick with restore and 127 * pull registers from the previous window (the one that we'll switch to once 128 * the simple store instruction is executed) rather than the current one. This 129 * is why in the case of emulating a restore we set the DTrace CPU flag 130 * CPU_DTRACE_FAKERESTORE before calling dtrace_probe() for the return probes 131 * (see fasttrap_return_common()). 132 */ 133 134 #define OP(x) ((x) >> 30) 135 #define OP2(x) (((x) >> 22) & 0x07) 136 #define OP3(x) (((x) >> 19) & 0x3f) 137 #define RCOND(x) (((x) >> 25) & 0x07) 138 #define COND(x) (((x) >> 25) & 0x0f) 139 #define A(x) (((x) >> 29) & 0x01) 140 #define I(x) (((x) >> 13) & 0x01) 141 #define RD(x) (((x) >> 25) & 0x1f) 142 #define RS1(x) (((x) >> 14) & 0x1f) 143 #define RS2(x) (((x) >> 0) & 0x1f) 144 #define CC(x) (((x) >> 20) & 0x03) 145 #define DISP16(x) ((((x) >> 6) & 0xc000) | ((x) & 0x3fff)) 146 #define DISP22(x) ((x) & 0x3fffff) 147 #define DISP19(x) ((x) & 0x7ffff) 148 #define DISP30(x) ((x) & 0x3fffffff) 149 #define SW_TRAP(x) ((x) & 0x7f) 150 151 #define OP3_OR 0x02 152 #define OP3_RD 0x28 153 #define OP3_JMPL 0x38 154 #define OP3_RETURN 0x39 155 #define OP3_TCC 0x3a 156 #define OP3_SAVE 0x3c 157 #define OP3_RESTORE 0x3d 158 159 #define OP3_PREFETCH 0x2d 160 #define OP3_CASA 0x3c 161 #define OP3_PREFETCHA 0x3d 162 #define OP3_CASXA 0x3e 163 164 #define OP2_ILLTRAP 0x0 165 #define OP2_BPcc 0x1 166 #define OP2_Bicc 0x2 167 #define OP2_BPr 0x3 168 #define OP2_SETHI 0x4 169 #define OP2_FBPfcc 0x5 170 #define OP2_FBfcc 0x6 171 172 #define R_G0 0 173 #define R_O0 8 174 #define R_SP 14 175 #define R_I0 24 176 #define R_I1 25 177 #define R_I2 26 178 #define R_I3 27 179 180 /* 181 * Check the comment in fasttrap.h when changing these offsets or adding 182 * new instructions. 183 */ 184 #define FASTTRAP_OFF_SAVE 64 185 #define FASTTRAP_OFF_RESTORE 68 186 #define FASTTRAP_OFF_FTRET 72 187 #define FASTTRAP_OFF_RETURN 76 188 189 #define BREAKPOINT_INSTR 0x91d02001 /* ta 1 */ 190 191 /* 192 * Tunable to let users turn off the fancy save instruction optimization. 193 * If a program is non-ABI compliant, there's a possibility that the save 194 * instruction optimization could cause an error. 195 */ 196 int fasttrap_optimize_save = 1; 197 198 static uint64_t 199 fasttrap_anarg(struct regs *rp, int argno) 200 { 201 uint64_t value; 202 203 if (argno < 6) 204 return ((&rp->r_o0)[argno]); 205 206 if (curproc->p_model == DATAMODEL_NATIVE) { 207 struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS); 208 209 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 210 value = dtrace_fulword(&fr->fr_argd[argno]); 211 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR | 212 CPU_DTRACE_BADALIGN); 213 } else { 214 struct frame32 *fr = (struct frame32 *)rp->r_sp; 215 216 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 217 value = dtrace_fuword32(&fr->fr_argd[argno]); 218 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR | 219 CPU_DTRACE_BADALIGN); 220 } 221 222 return (value); 223 } 224 225 static ulong_t fasttrap_getreg(struct regs *, uint_t); 226 static void fasttrap_putreg(struct regs *, uint_t, ulong_t); 227 228 static void 229 fasttrap_usdt_args(fasttrap_probe_t *probe, struct regs *rp, int argc, 230 uintptr_t *argv) 231 { 232 int i, x, cap = MIN(argc, probe->ftp_nargs); 233 234 if (curproc->p_model == DATAMODEL_NATIVE) { 235 struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS); 236 uintptr_t v; 237 238 for (i = 0; i < cap; i++) { 239 x = probe->ftp_argmap[i]; 240 241 if (x < 6) 242 argv[i] = (&rp->r_o0)[x]; 243 else if (fasttrap_fulword(&fr->fr_argd[x], &v) != 0) 244 argv[i] = 0; 245 } 246 247 } else { 248 struct frame32 *fr = (struct frame32 *)rp->r_sp; 249 uint32_t v; 250 251 for (i = 0; i < cap; i++) { 252 x = probe->ftp_argmap[i]; 253 254 if (x < 6) 255 argv[i] = (&rp->r_o0)[x]; 256 else if (fasttrap_fuword32(&fr->fr_argd[x], &v) != 0) 257 argv[i] = 0; 258 } 259 } 260 261 for (; i < argc; i++) { 262 argv[i] = 0; 263 } 264 } 265 266 static void 267 fasttrap_return_common(struct regs *rp, uintptr_t pc, pid_t pid, 268 uint_t fake_restore) 269 { 270 fasttrap_tracepoint_t *tp; 271 fasttrap_bucket_t *bucket; 272 fasttrap_id_t *id; 273 kmutex_t *pid_mtx; 274 dtrace_icookie_t cookie; 275 276 pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock; 277 mutex_enter(pid_mtx); 278 bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]; 279 280 for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { 281 if (pid == tp->ftt_pid && pc == tp->ftt_pc && 282 !tp->ftt_proc->ftpc_defunct) 283 break; 284 } 285 286 /* 287 * Don't sweat it if we can't find the tracepoint again; unlike 288 * when we're in fasttrap_pid_probe(), finding the tracepoint here 289 * is not essential to the correct execution of the process. 290 */ 291 if (tp == NULL || tp->ftt_retids == NULL) { 292 mutex_exit(pid_mtx); 293 return; 294 } 295 296 for (id = tp->ftt_retids; id != NULL; id = id->fti_next) { 297 fasttrap_probe_t *probe = id->fti_probe; 298 299 if (id->fti_ptype == DTFTP_POST_OFFSETS) { 300 if (probe->ftp_argmap == NULL) { 301 dtrace_probe(probe->ftp_id, rp->r_o0, rp->r_o1, 302 rp->r_o2, rp->r_o3, rp->r_o4); 303 } else { 304 uintptr_t t[5]; 305 306 fasttrap_usdt_args(probe, rp, 307 sizeof (t) / sizeof (t[0]), t); 308 309 dtrace_probe(probe->ftp_id, t[0], t[1], 310 t[2], t[3], t[4]); 311 } 312 continue; 313 } 314 315 /* 316 * If this is only a possible return point, we must 317 * be looking at a potential tail call in leaf context. 318 * If the %npc is still within this function, then we 319 * must have misidentified a jmpl as a tail-call when it 320 * is, in fact, part of a jump table. It would be nice to 321 * remove this tracepoint, but this is neither the time 322 * nor the place. 323 */ 324 if ((tp->ftt_flags & FASTTRAP_F_RETMAYBE) && 325 rp->r_npc - probe->ftp_faddr < probe->ftp_fsize) 326 continue; 327 328 /* 329 * It's possible for a function to branch to the delay slot 330 * of an instruction that we've identified as a return site. 331 * We can dectect this spurious return probe activation by 332 * observing that in this case %npc will be %pc + 4 and %npc 333 * will be inside the current function (unless the user is 334 * doing _crazy_ instruction picking in which case there's 335 * very little we can do). The second check is important 336 * in case the last instructions of a function make a tail- 337 * call to the function located immediately subsequent. 338 */ 339 if (rp->r_npc == rp->r_pc + 4 && 340 rp->r_npc - probe->ftp_faddr < probe->ftp_fsize) 341 continue; 342 343 /* 344 * The first argument is the offset of return tracepoint 345 * in the function; the remaining arguments are the return 346 * values. 347 * 348 * If fake_restore is set, we need to pull the return values 349 * out of the %i's rather than the %o's -- a little trickier. 350 */ 351 if (!fake_restore) { 352 dtrace_probe(probe->ftp_id, pc - probe->ftp_faddr, 353 rp->r_o0, rp->r_o1, rp->r_o2, rp->r_o3); 354 } else { 355 uintptr_t arg0 = fasttrap_getreg(rp, R_I0); 356 uintptr_t arg1 = fasttrap_getreg(rp, R_I1); 357 uintptr_t arg2 = fasttrap_getreg(rp, R_I2); 358 uintptr_t arg3 = fasttrap_getreg(rp, R_I3); 359 360 cookie = dtrace_interrupt_disable(); 361 DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE); 362 dtrace_probe(probe->ftp_id, pc - probe->ftp_faddr, 363 arg0, arg1, arg2, arg3); 364 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE); 365 dtrace_interrupt_enable(cookie); 366 } 367 } 368 369 mutex_exit(pid_mtx); 370 } 371 372 int 373 fasttrap_pid_probe(struct regs *rp) 374 { 375 proc_t *p = curproc; 376 fasttrap_tracepoint_t *tp, tp_local; 377 fasttrap_id_t *id; 378 pid_t pid; 379 uintptr_t pc = rp->r_pc; 380 uintptr_t npc = rp->r_npc; 381 uintptr_t orig_pc = pc; 382 fasttrap_bucket_t *bucket; 383 kmutex_t *pid_mtx; 384 uint_t fake_restore = 0, is_enabled = 0; 385 dtrace_icookie_t cookie; 386 387 /* 388 * It's possible that a user (in a veritable orgy of bad planning) 389 * could redirect this thread's flow of control before it reached the 390 * return probe fasttrap. In this case we need to kill the process 391 * since it's in a unrecoverable state. 392 */ 393 if (curthread->t_dtrace_step) { 394 ASSERT(curthread->t_dtrace_on); 395 fasttrap_sigtrap(p, curthread, pc); 396 return (0); 397 } 398 399 /* 400 * Clear all user tracing flags. 401 */ 402 curthread->t_dtrace_ft = 0; 403 curthread->t_dtrace_pc = 0; 404 curthread->t_dtrace_npc = 0; 405 curthread->t_dtrace_scrpc = 0; 406 curthread->t_dtrace_astpc = 0; 407 408 /* 409 * Treat a child created by a call to vfork(2) as if it were its 410 * parent. We know that there's only one thread of control in such a 411 * process: this one. 412 */ 413 while (p->p_flag & SVFORK) { 414 p = p->p_parent; 415 } 416 417 pid = p->p_pid; 418 pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock; 419 mutex_enter(pid_mtx); 420 bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]; 421 422 /* 423 * Lookup the tracepoint that the process just hit. 424 */ 425 for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { 426 if (pid == tp->ftt_pid && pc == tp->ftt_pc && 427 !tp->ftt_proc->ftpc_defunct) 428 break; 429 } 430 431 /* 432 * If we couldn't find a matching tracepoint, either a tracepoint has 433 * been inserted without using the pid<pid> ioctl interface (see 434 * fasttrap_ioctl), or somehow we have mislaid this tracepoint. 435 */ 436 if (tp == NULL) { 437 mutex_exit(pid_mtx); 438 return (-1); 439 } 440 441 for (id = tp->ftt_ids; id != NULL; id = id->fti_next) { 442 fasttrap_probe_t *probe = id->fti_probe; 443 int isentry = (id->fti_ptype == DTFTP_ENTRY); 444 445 if (id->fti_ptype == DTFTP_IS_ENABLED) { 446 is_enabled = 1; 447 continue; 448 } 449 450 /* 451 * We note that this was an entry probe to help ustack() find 452 * the first caller. 453 */ 454 if (isentry) { 455 cookie = dtrace_interrupt_disable(); 456 DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY); 457 } 458 dtrace_probe(probe->ftp_id, rp->r_o0, rp->r_o1, rp->r_o2, 459 rp->r_o3, rp->r_o4); 460 if (isentry) { 461 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY); 462 dtrace_interrupt_enable(cookie); 463 } 464 } 465 466 /* 467 * We're about to do a bunch of work so we cache a local copy of 468 * the tracepoint to emulate the instruction, and then find the 469 * tracepoint again later if we need to light up any return probes. 470 */ 471 tp_local = *tp; 472 mutex_exit(pid_mtx); 473 tp = &tp_local; 474 475 /* 476 * If there's an is-enabled probe conntected to this tracepoint it 477 * means that there was a 'mov %g0, %o0' instruction that was placed 478 * there by DTrace when the binary was linked. As this probe is, in 479 * fact, enabled, we need to stuff 1 into %o0. Accordingly, we can 480 * bypass all the instruction emulation logic since we know the 481 * inevitable result. It's possible that a user could construct a 482 * scenario where the 'is-enabled' probe was on some other 483 * instruction, but that would be a rather exotic way to shoot oneself 484 * in the foot. 485 */ 486 if (is_enabled) { 487 rp->r_o0 = 1; 488 pc = rp->r_npc; 489 npc = pc + 4; 490 goto done; 491 } 492 493 /* 494 * We emulate certain types of instructions to ensure correctness 495 * (in the case of position dependent instructions) or optimize 496 * common cases. The rest we have the thread execute back in user- 497 * land. 498 */ 499 switch (tp->ftt_type) { 500 case FASTTRAP_T_SAVE: 501 { 502 int32_t imm; 503 504 /* 505 * This an optimization to let us handle function entry 506 * probes more efficiently. Many functions begin with a save 507 * instruction that follows the pattern: 508 * save %sp, <imm>, %sp 509 * 510 * Meanwhile, we've stashed the instruction: 511 * save %g1, %g0, %sp 512 * 513 * off of %g7, so all we have to do is stick the right value 514 * into %g1 and reset %pc to point to the instruction we've 515 * cleverly hidden (%npc should not be touched). 516 */ 517 518 imm = tp->ftt_instr << 19; 519 imm >>= 19; 520 rp->r_g1 = rp->r_sp + imm; 521 pc = rp->r_g7 + FASTTRAP_OFF_SAVE; 522 break; 523 } 524 525 case FASTTRAP_T_RESTORE: 526 { 527 ulong_t value; 528 uint_t rd; 529 530 /* 531 * This is an optimization to let us handle function 532 * return probes more efficiently. Most non-leaf functions 533 * end with the sequence: 534 * ret 535 * restore <reg>, <reg_or_imm>, %oX 536 * 537 * We've stashed the instruction: 538 * restore %g0, %g0, %g0 539 * 540 * off of %g7 so we just need to place the correct value 541 * in the right %i register (since after our fake-o 542 * restore, the %i's will become the %o's) and set the %pc 543 * to point to our hidden restore. We also set fake_restore to 544 * let fasttrap_return_common() know that it will find the 545 * return values in the %i's rather than the %o's. 546 */ 547 548 if (I(tp->ftt_instr)) { 549 int32_t imm; 550 551 imm = tp->ftt_instr << 19; 552 imm >>= 19; 553 value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + imm; 554 } else { 555 value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + 556 fasttrap_getreg(rp, RS2(tp->ftt_instr)); 557 } 558 559 /* 560 * Convert %o's to %i's; leave %g's as they are. 561 */ 562 rd = RD(tp->ftt_instr); 563 fasttrap_putreg(rp, ((rd & 0x18) == 0x8) ? rd + 16 : rd, value); 564 565 pc = rp->r_g7 + FASTTRAP_OFF_RESTORE; 566 fake_restore = 1; 567 break; 568 } 569 570 case FASTTRAP_T_RETURN: 571 { 572 uintptr_t target; 573 574 /* 575 * A return instruction is like a jmpl (without the link 576 * part) that executes an implicit restore. We've stashed 577 * the instruction: 578 * return %o0 579 * 580 * off of %g7 so we just need to place the target in %o0 581 * and set the %pc to point to the stashed return instruction. 582 * We use %o0 since that register disappears after the return 583 * executes, erasing any evidence of this tampering. 584 */ 585 if (I(tp->ftt_instr)) { 586 int32_t imm; 587 588 imm = tp->ftt_instr << 19; 589 imm >>= 19; 590 target = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + imm; 591 } else { 592 target = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + 593 fasttrap_getreg(rp, RS2(tp->ftt_instr)); 594 } 595 596 fasttrap_putreg(rp, R_O0, target); 597 598 pc = rp->r_g7 + FASTTRAP_OFF_RETURN; 599 fake_restore = 1; 600 break; 601 } 602 603 case FASTTRAP_T_OR: 604 { 605 ulong_t value; 606 607 if (I(tp->ftt_instr)) { 608 int32_t imm; 609 610 imm = tp->ftt_instr << 19; 611 imm >>= 19; 612 value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) | imm; 613 } else { 614 value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) | 615 fasttrap_getreg(rp, RS2(tp->ftt_instr)); 616 } 617 618 fasttrap_putreg(rp, RD(tp->ftt_instr), value); 619 pc = rp->r_npc; 620 npc = pc + 4; 621 break; 622 } 623 624 case FASTTRAP_T_SETHI: 625 if (RD(tp->ftt_instr) != R_G0) { 626 uint32_t imm32 = tp->ftt_instr << 10; 627 fasttrap_putreg(rp, RD(tp->ftt_instr), (ulong_t)imm32); 628 } 629 pc = rp->r_npc; 630 npc = pc + 4; 631 break; 632 633 case FASTTRAP_T_CCR: 634 { 635 uint_t c, v, z, n, taken; 636 uint_t ccr = rp->r_tstate >> TSTATE_CCR_SHIFT; 637 638 if (tp->ftt_cc != 0) 639 ccr >>= 4; 640 641 c = (ccr >> 0) & 1; 642 v = (ccr >> 1) & 1; 643 z = (ccr >> 2) & 1; 644 n = (ccr >> 3) & 1; 645 646 switch (tp->ftt_code) { 647 case 0x0: /* BN */ 648 taken = 0; break; 649 case 0x1: /* BE */ 650 taken = z; break; 651 case 0x2: /* BLE */ 652 taken = z | (n ^ v); break; 653 case 0x3: /* BL */ 654 taken = n ^ v; break; 655 case 0x4: /* BLEU */ 656 taken = c | z; break; 657 case 0x5: /* BCS (BLU) */ 658 taken = c; break; 659 case 0x6: /* BNEG */ 660 taken = n; break; 661 case 0x7: /* BVS */ 662 taken = v; break; 663 case 0x8: /* BA */ 664 /* 665 * We handle the BA case differently since the annul 666 * bit means something slightly different. 667 */ 668 panic("fasttrap: mishandled a branch"); 669 taken = 1; break; 670 case 0x9: /* BNE */ 671 taken = ~z; break; 672 case 0xa: /* BG */ 673 taken = ~(z | (n ^ v)); break; 674 case 0xb: /* BGE */ 675 taken = ~(n ^ v); break; 676 case 0xc: /* BGU */ 677 taken = ~(c | z); break; 678 case 0xd: /* BCC (BGEU) */ 679 taken = ~c; break; 680 case 0xe: /* BPOS */ 681 taken = ~n; break; 682 case 0xf: /* BVC */ 683 taken = ~v; break; 684 } 685 686 if (taken & 1) { 687 pc = rp->r_npc; 688 npc = tp->ftt_dest; 689 } else if (tp->ftt_flags & FASTTRAP_F_ANNUL) { 690 /* 691 * Untaken annulled branches don't execute the 692 * instruction in the delay slot. 693 */ 694 pc = rp->r_npc + 4; 695 npc = pc + 4; 696 } else { 697 pc = rp->r_npc; 698 npc = pc + 4; 699 } 700 break; 701 } 702 703 case FASTTRAP_T_FCC: 704 { 705 uint_t fcc; 706 uint_t taken; 707 uint64_t fsr; 708 709 dtrace_getfsr(&fsr); 710 711 if (tp->ftt_cc == 0) { 712 fcc = (fsr >> 10) & 0x3; 713 } else { 714 uint_t shift; 715 ASSERT(tp->ftt_cc <= 3); 716 shift = 30 + tp->ftt_cc * 2; 717 fcc = (fsr >> shift) & 0x3; 718 } 719 720 switch (tp->ftt_code) { 721 case 0x0: /* FBN */ 722 taken = (1 << fcc) & (0|0|0|0); break; 723 case 0x1: /* FBNE */ 724 taken = (1 << fcc) & (8|4|2|0); break; 725 case 0x2: /* FBLG */ 726 taken = (1 << fcc) & (0|4|2|0); break; 727 case 0x3: /* FBUL */ 728 taken = (1 << fcc) & (8|0|2|0); break; 729 case 0x4: /* FBL */ 730 taken = (1 << fcc) & (0|0|2|0); break; 731 case 0x5: /* FBUG */ 732 taken = (1 << fcc) & (8|4|0|0); break; 733 case 0x6: /* FBG */ 734 taken = (1 << fcc) & (0|4|0|0); break; 735 case 0x7: /* FBU */ 736 taken = (1 << fcc) & (8|0|0|0); break; 737 case 0x8: /* FBA */ 738 /* 739 * We handle the FBA case differently since the annul 740 * bit means something slightly different. 741 */ 742 panic("fasttrap: mishandled a branch"); 743 taken = (1 << fcc) & (8|4|2|1); break; 744 case 0x9: /* FBE */ 745 taken = (1 << fcc) & (0|0|0|1); break; 746 case 0xa: /* FBUE */ 747 taken = (1 << fcc) & (8|0|0|1); break; 748 case 0xb: /* FBGE */ 749 taken = (1 << fcc) & (0|4|0|1); break; 750 case 0xc: /* FBUGE */ 751 taken = (1 << fcc) & (8|4|0|1); break; 752 case 0xd: /* FBLE */ 753 taken = (1 << fcc) & (0|0|2|1); break; 754 case 0xe: /* FBULE */ 755 taken = (1 << fcc) & (8|0|2|1); break; 756 case 0xf: /* FBO */ 757 taken = (1 << fcc) & (0|4|2|1); break; 758 } 759 760 if (taken) { 761 pc = rp->r_npc; 762 npc = tp->ftt_dest; 763 } else if (tp->ftt_flags & FASTTRAP_F_ANNUL) { 764 /* 765 * Untaken annulled branches don't execute the 766 * instruction in the delay slot. 767 */ 768 pc = rp->r_npc + 4; 769 npc = pc + 4; 770 } else { 771 pc = rp->r_npc; 772 npc = pc + 4; 773 } 774 break; 775 } 776 777 case FASTTRAP_T_REG: 778 { 779 uint64_t value; 780 uint_t taken; 781 uint_t reg = RS1(tp->ftt_instr); 782 783 /* 784 * An ILP32 process shouldn't be using a branch predicated on 785 * an %i or an %l since it would violate the ABI. It's a 786 * violation of the ABI because we can't ensure deterministic 787 * behavior. We should have identified this case when we 788 * enabled the probe. 789 */ 790 ASSERT(p->p_model == DATAMODEL_LP64 || reg < 16); 791 792 value = fasttrap_getreg(rp, reg); 793 794 switch (tp->ftt_code) { 795 case 0x1: /* BRZ */ 796 taken = (value == 0); break; 797 case 0x2: /* BRLEZ */ 798 taken = (value <= 0); break; 799 case 0x3: /* BRLZ */ 800 taken = (value < 0); break; 801 case 0x5: /* BRNZ */ 802 taken = (value != 0); break; 803 case 0x6: /* BRGZ */ 804 taken = (value > 0); break; 805 case 0x7: /* BRGEZ */ 806 taken = (value <= 0); break; 807 default: 808 case 0x0: 809 case 0x4: 810 panic("fasttrap: mishandled a branch"); 811 } 812 813 if (taken) { 814 pc = rp->r_npc; 815 npc = tp->ftt_dest; 816 } else if (tp->ftt_flags & FASTTRAP_F_ANNUL) { 817 /* 818 * Untaken annulled branches don't execute the 819 * instruction in the delay slot. 820 */ 821 pc = rp->r_npc + 4; 822 npc = pc + 4; 823 } else { 824 pc = rp->r_npc; 825 npc = pc + 4; 826 } 827 break; 828 } 829 830 case FASTTRAP_T_ALWAYS: 831 /* 832 * BAs, BA,As... 833 */ 834 835 if (tp->ftt_flags & FASTTRAP_F_ANNUL) { 836 /* 837 * Annulled branch always instructions never execute 838 * the instruction in the delay slot. 839 */ 840 pc = tp->ftt_dest; 841 npc = tp->ftt_dest + 4; 842 } else { 843 pc = rp->r_npc; 844 npc = tp->ftt_dest; 845 } 846 break; 847 848 case FASTTRAP_T_RDPC: 849 fasttrap_putreg(rp, RD(tp->ftt_instr), rp->r_pc); 850 pc = rp->r_npc; 851 npc = pc + 4; 852 break; 853 854 case FASTTRAP_T_CALL: 855 /* 856 * It's a call _and_ link remember... 857 */ 858 rp->r_o7 = rp->r_pc; 859 pc = rp->r_npc; 860 npc = tp->ftt_dest; 861 break; 862 863 case FASTTRAP_T_JMPL: 864 pc = rp->r_npc; 865 866 if (I(tp->ftt_instr)) { 867 uint_t rs1 = RS1(tp->ftt_instr); 868 int32_t imm; 869 870 imm = tp->ftt_instr << 19; 871 imm >>= 19; 872 npc = fasttrap_getreg(rp, rs1) + imm; 873 } else { 874 uint_t rs1 = RS1(tp->ftt_instr); 875 uint_t rs2 = RS2(tp->ftt_instr); 876 877 npc = fasttrap_getreg(rp, rs1) + 878 fasttrap_getreg(rp, rs2); 879 } 880 881 /* 882 * Do the link part of the jump-and-link instruction. 883 */ 884 fasttrap_putreg(rp, RD(tp->ftt_instr), rp->r_pc); 885 886 break; 887 888 case FASTTRAP_T_COMMON: 889 { 890 curthread->t_dtrace_scrpc = rp->r_g7; 891 curthread->t_dtrace_astpc = rp->r_g7 + FASTTRAP_OFF_FTRET; 892 893 /* 894 * Copy the instruction to a reserved location in the 895 * user-land thread structure, then set the PC to that 896 * location and leave the NPC alone. We take pains to ensure 897 * consistency in the instruction stream (See SPARC 898 * Architecture Manual Version 9, sections 8.4.7, A.20, and 899 * H.1.6; UltraSPARC I/II User's Manual, sections 3.1.1.1, 900 * and 13.6.4) by using the ASI ASI_BLK_COMMIT_S to copy the 901 * instruction into the user's address space without 902 * bypassing the I$. There's no AS_USER version of this ASI 903 * (as exist for other ASIs) so we use the lofault 904 * mechanism to catch faults. 905 */ 906 if (dtrace_blksuword32(rp->r_g7, &tp->ftt_instr, 1) == -1) { 907 /* 908 * If the copyout fails, then the process's state 909 * is not consistent (the effects of the traced 910 * instruction will never be seen). This process 911 * cannot be allowed to continue execution. 912 */ 913 fasttrap_sigtrap(curproc, curthread, pc); 914 return (0); 915 } 916 917 curthread->t_dtrace_pc = pc; 918 curthread->t_dtrace_npc = npc; 919 curthread->t_dtrace_on = 1; 920 921 pc = curthread->t_dtrace_scrpc; 922 923 if (tp->ftt_retids != NULL) { 924 curthread->t_dtrace_step = 1; 925 curthread->t_dtrace_ret = 1; 926 npc = curthread->t_dtrace_astpc; 927 } 928 break; 929 } 930 931 default: 932 panic("fasttrap: mishandled an instruction"); 933 } 934 935 /* 936 * This bit me in the ass a couple of times, so lets toss this 937 * in as a cursory sanity check. 938 */ 939 ASSERT(pc != rp->r_g7 + 4); 940 ASSERT(pc != rp->r_g7 + 8); 941 942 done: 943 /* 944 * If there were no return probes when we first found the tracepoint, 945 * we should feel no obligation to honor any return probes that were 946 * subsequently enabled -- they'll just have to wait until the next 947 * time around. 948 */ 949 if (tp->ftt_retids != NULL) { 950 /* 951 * We need to wait until the results of the instruction are 952 * apparent before invoking any return probes. If this 953 * instruction was emulated we can just call 954 * fasttrap_return_common(); if it needs to be executed, we 955 * need to wait until we return to the kernel. 956 */ 957 if (tp->ftt_type != FASTTRAP_T_COMMON) { 958 fasttrap_return_common(rp, orig_pc, pid, fake_restore); 959 } else { 960 ASSERT(curthread->t_dtrace_ret != 0); 961 ASSERT(curthread->t_dtrace_pc == orig_pc); 962 ASSERT(curthread->t_dtrace_scrpc == rp->r_g7); 963 ASSERT(npc == curthread->t_dtrace_astpc); 964 } 965 } 966 967 ASSERT(pc != 0); 968 rp->r_pc = pc; 969 rp->r_npc = npc; 970 971 return (0); 972 } 973 974 int 975 fasttrap_return_probe(struct regs *rp) 976 { 977 proc_t *p = ttoproc(curthread); 978 pid_t pid; 979 uintptr_t pc = curthread->t_dtrace_pc; 980 uintptr_t npc = curthread->t_dtrace_npc; 981 982 curthread->t_dtrace_pc = 0; 983 curthread->t_dtrace_npc = 0; 984 curthread->t_dtrace_scrpc = 0; 985 curthread->t_dtrace_astpc = 0; 986 987 /* 988 * Treat a child created by a call to vfork(2) as if it were its 989 * parent. We know there's only one thread of control in such a 990 * process: this one. 991 */ 992 while (p->p_flag & SVFORK) { 993 p = p->p_parent; 994 } 995 996 /* 997 * We set the %pc and %npc to their values when the traced 998 * instruction was initially executed so that it appears to 999 * dtrace_probe() that we're on the original instruction, and so that 1000 * the user can't easily detect our complex web of lies. 1001 * dtrace_return_probe() (our caller) will correctly set %pc and %npc 1002 * after we return. 1003 */ 1004 rp->r_pc = pc; 1005 rp->r_npc = npc; 1006 1007 pid = p->p_pid; 1008 fasttrap_return_common(rp, pc, pid, 0); 1009 1010 return (0); 1011 } 1012 1013 int 1014 fasttrap_tracepoint_install(proc_t *p, fasttrap_tracepoint_t *tp) 1015 { 1016 fasttrap_instr_t instr = FASTTRAP_INSTR; 1017 1018 if (uwrite(p, &instr, 4, tp->ftt_pc) != 0) 1019 return (-1); 1020 1021 return (0); 1022 } 1023 1024 int 1025 fasttrap_tracepoint_remove(proc_t *p, fasttrap_tracepoint_t *tp) 1026 { 1027 fasttrap_instr_t instr; 1028 1029 /* 1030 * Distinguish between read or write failures and a changed 1031 * instruction. 1032 */ 1033 if (uread(p, &instr, 4, tp->ftt_pc) != 0) 1034 return (0); 1035 if (instr != FASTTRAP_INSTR && instr != BREAKPOINT_INSTR) 1036 return (0); 1037 if (uwrite(p, &tp->ftt_instr, 4, tp->ftt_pc) != 0) 1038 return (-1); 1039 1040 return (0); 1041 } 1042 1043 int 1044 fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, uintptr_t pc, 1045 fasttrap_probe_type_t type) 1046 { 1047 uint32_t instr; 1048 int32_t disp; 1049 1050 /* 1051 * Read the instruction at the given address out of the process's 1052 * address space. We don't have to worry about a debugger 1053 * changing this instruction before we overwrite it with our trap 1054 * instruction since P_PR_LOCK is set. 1055 */ 1056 if (uread(p, &instr, 4, pc) != 0) 1057 return (-1); 1058 1059 /* 1060 * Decode the instruction to fill in the probe flags. We can have 1061 * the process execute most instructions on its own using a pc/npc 1062 * trick, but pc-relative control transfer present a problem since 1063 * we're relocating the instruction. We emulate these instructions 1064 * in the kernel. We assume a default type and over-write that as 1065 * needed. 1066 * 1067 * pc-relative instructions must be emulated for correctness; 1068 * other instructions (which represent a large set of commonly traced 1069 * instructions) are emulated or otherwise optimized for performance. 1070 */ 1071 tp->ftt_type = FASTTRAP_T_COMMON; 1072 if (OP(instr) == 1) { 1073 /* 1074 * Call instructions. 1075 */ 1076 tp->ftt_type = FASTTRAP_T_CALL; 1077 disp = DISP30(instr) << 2; 1078 tp->ftt_dest = pc + (intptr_t)disp; 1079 1080 } else if (OP(instr) == 0) { 1081 /* 1082 * Branch instructions. 1083 * 1084 * Unconditional branches need careful attention when they're 1085 * annulled: annulled unconditional branches never execute 1086 * the instruction in the delay slot. 1087 */ 1088 switch (OP2(instr)) { 1089 case OP2_ILLTRAP: 1090 case 0x7: 1091 /* 1092 * The compiler may place an illtrap after a call to 1093 * a function that returns a structure. In the case of 1094 * a returned structure, the compiler places an illtrap 1095 * whose const22 field is the size of the returned 1096 * structure immediately following the delay slot of 1097 * the call. To stay out of the way, we refuse to 1098 * place tracepoints on top of illtrap instructions. 1099 * 1100 * This is one of the dumbest architectural decisions 1101 * I've ever had to work around. 1102 * 1103 * We also identify the only illegal op2 value (See 1104 * SPARC Architecture Manual Version 9, E.2 table 31). 1105 */ 1106 return (-1); 1107 1108 case OP2_BPcc: 1109 if (COND(instr) == 8) { 1110 tp->ftt_type = FASTTRAP_T_ALWAYS; 1111 } else { 1112 /* 1113 * Check for an illegal instruction. 1114 */ 1115 if (CC(instr) & 1) 1116 return (-1); 1117 tp->ftt_type = FASTTRAP_T_CCR; 1118 tp->ftt_cc = CC(instr); 1119 tp->ftt_code = COND(instr); 1120 } 1121 1122 if (A(instr) != 0) 1123 tp->ftt_flags |= FASTTRAP_F_ANNUL; 1124 1125 disp = DISP19(instr); 1126 disp <<= 13; 1127 disp >>= 11; 1128 tp->ftt_dest = pc + (intptr_t)disp; 1129 break; 1130 1131 case OP2_Bicc: 1132 if (COND(instr) == 8) { 1133 tp->ftt_type = FASTTRAP_T_ALWAYS; 1134 } else { 1135 tp->ftt_type = FASTTRAP_T_CCR; 1136 tp->ftt_cc = 0; 1137 tp->ftt_code = COND(instr); 1138 } 1139 1140 if (A(instr) != 0) 1141 tp->ftt_flags |= FASTTRAP_F_ANNUL; 1142 1143 disp = DISP22(instr); 1144 disp <<= 10; 1145 disp >>= 8; 1146 tp->ftt_dest = pc + (intptr_t)disp; 1147 break; 1148 1149 case OP2_BPr: 1150 /* 1151 * Check for an illegal instruction. 1152 */ 1153 if ((RCOND(instr) & 3) == 0) 1154 return (-1); 1155 1156 /* 1157 * It's a violation of the v8plus ABI to use a 1158 * register-predicated branch in a 32-bit app if 1159 * the register used is an %l or an %i (%gs and %os 1160 * are legit because they're not saved to the stack 1161 * in 32-bit words when we take a trap). 1162 */ 1163 if (p->p_model == DATAMODEL_ILP32 && RS1(instr) >= 16) 1164 return (-1); 1165 1166 tp->ftt_type = FASTTRAP_T_REG; 1167 if (A(instr) != 0) 1168 tp->ftt_flags |= FASTTRAP_F_ANNUL; 1169 disp = DISP16(instr); 1170 disp <<= 16; 1171 disp >>= 14; 1172 tp->ftt_dest = pc + (intptr_t)disp; 1173 tp->ftt_code = RCOND(instr); 1174 break; 1175 1176 case OP2_SETHI: 1177 tp->ftt_type = FASTTRAP_T_SETHI; 1178 break; 1179 1180 case OP2_FBPfcc: 1181 if (COND(instr) == 8) { 1182 tp->ftt_type = FASTTRAP_T_ALWAYS; 1183 } else { 1184 tp->ftt_type = FASTTRAP_T_FCC; 1185 tp->ftt_cc = CC(instr); 1186 tp->ftt_code = COND(instr); 1187 } 1188 1189 if (A(instr) != 0) 1190 tp->ftt_flags |= FASTTRAP_F_ANNUL; 1191 1192 disp = DISP19(instr); 1193 disp <<= 13; 1194 disp >>= 11; 1195 tp->ftt_dest = pc + (intptr_t)disp; 1196 break; 1197 1198 case OP2_FBfcc: 1199 if (COND(instr) == 8) { 1200 tp->ftt_type = FASTTRAP_T_ALWAYS; 1201 } else { 1202 tp->ftt_type = FASTTRAP_T_FCC; 1203 tp->ftt_cc = 0; 1204 tp->ftt_code = COND(instr); 1205 } 1206 1207 if (A(instr) != 0) 1208 tp->ftt_flags |= FASTTRAP_F_ANNUL; 1209 1210 disp = DISP22(instr); 1211 disp <<= 10; 1212 disp >>= 8; 1213 tp->ftt_dest = pc + (intptr_t)disp; 1214 break; 1215 } 1216 1217 } else if (OP(instr) == 2) { 1218 switch (OP3(instr)) { 1219 case OP3_RETURN: 1220 tp->ftt_type = FASTTRAP_T_RETURN; 1221 break; 1222 1223 case OP3_JMPL: 1224 tp->ftt_type = FASTTRAP_T_JMPL; 1225 break; 1226 1227 case OP3_RD: 1228 if (RS1(instr) == 5) 1229 tp->ftt_type = FASTTRAP_T_RDPC; 1230 break; 1231 1232 case OP3_SAVE: 1233 /* 1234 * We optimize for save instructions at function 1235 * entry; see the comment in fasttrap_pid_probe() 1236 * (near FASTTRAP_T_SAVE) for details. 1237 */ 1238 if (fasttrap_optimize_save != 0 && 1239 type == DTFTP_ENTRY && 1240 I(instr) == 1 && RD(instr) == R_SP) 1241 tp->ftt_type = FASTTRAP_T_SAVE; 1242 break; 1243 1244 case OP3_RESTORE: 1245 /* 1246 * We optimize restore instructions at function 1247 * return; see the comment in fasttrap_pid_probe() 1248 * (near FASTTRAP_T_RESTORE) for details. 1249 * 1250 * rd must be an %o or %g register. 1251 */ 1252 if ((RD(instr) & 0x10) == 0) 1253 tp->ftt_type = FASTTRAP_T_RESTORE; 1254 break; 1255 1256 case OP3_OR: 1257 /* 1258 * A large proportion of instructions in the delay 1259 * slot of retl instructions are or's so we emulate 1260 * these downstairs as an optimization. 1261 */ 1262 tp->ftt_type = FASTTRAP_T_OR; 1263 break; 1264 1265 case OP3_TCC: 1266 /* 1267 * Breakpoint instructions are effectively position- 1268 * dependent since the debugger uses the %pc value 1269 * to lookup which breakpoint was executed. As a 1270 * result, we can't actually instrument breakpoints. 1271 */ 1272 if (SW_TRAP(instr) == ST_BREAKPOINT) 1273 return (-1); 1274 break; 1275 1276 case 0x19: 1277 case 0x1d: 1278 case 0x29: 1279 case 0x33: 1280 case 0x3f: 1281 /* 1282 * Identify illegal instructions (See SPARC 1283 * Architecture Manual Version 9, E.2 table 32). 1284 */ 1285 return (-1); 1286 } 1287 } else if (OP(instr) == 3) { 1288 uint32_t op3 = OP3(instr); 1289 1290 /* 1291 * Identify illegal instructions (See SPARC Architecture 1292 * Manual Version 9, E.2 table 33). 1293 */ 1294 if ((op3 & 0x28) == 0x28) { 1295 if (op3 != OP3_PREFETCH && op3 != OP3_CASA && 1296 op3 != OP3_PREFETCHA && op3 != OP3_CASXA) 1297 return (-1); 1298 } else { 1299 if ((op3 & 0x0f) == 0x0c || (op3 & 0x3b) == 0x31) 1300 return (-1); 1301 } 1302 } 1303 1304 tp->ftt_instr = instr; 1305 1306 /* 1307 * We don't know how this tracepoint is going to be used, but in case 1308 * it's used as part of a function return probe, we need to indicate 1309 * whether it's always a return site or only potentially a return 1310 * site. If it's part of a return probe, it's always going to be a 1311 * return from that function if it's a restore instruction or if 1312 * the previous instruction was a return. If we could reliably 1313 * distinguish jump tables from return sites, this wouldn't be 1314 * necessary. 1315 */ 1316 if (tp->ftt_type != FASTTRAP_T_RESTORE && 1317 (uread(p, &instr, 4, pc - sizeof (instr)) != 0 || 1318 !(OP(instr) == 2 && OP3(instr) == OP3_RETURN))) 1319 tp->ftt_flags |= FASTTRAP_F_RETMAYBE; 1320 1321 return (0); 1322 } 1323 1324 /*ARGSUSED*/ 1325 uint64_t 1326 fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno, 1327 int aframes) 1328 { 1329 return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, argno)); 1330 } 1331 1332 /*ARGSUSED*/ 1333 uint64_t 1334 fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno, 1335 int aframes) 1336 { 1337 return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, argno)); 1338 } 1339 1340 static uint64_t fasttrap_getreg_fast_cnt; 1341 static uint64_t fasttrap_getreg_mpcb_cnt; 1342 static uint64_t fasttrap_getreg_slow_cnt; 1343 1344 static ulong_t 1345 fasttrap_getreg(struct regs *rp, uint_t reg) 1346 { 1347 ulong_t value; 1348 dtrace_icookie_t cookie; 1349 struct machpcb *mpcb; 1350 extern ulong_t dtrace_getreg_win(uint_t, uint_t); 1351 1352 /* 1353 * We have the %os and %gs in our struct regs, but if we need to 1354 * snag a %l or %i we need to go scrounging around in the process's 1355 * address space. 1356 */ 1357 if (reg == 0) 1358 return (0); 1359 1360 if (reg < 16) 1361 return ((&rp->r_g1)[reg - 1]); 1362 1363 /* 1364 * Before we look at the user's stack, we'll check the register 1365 * windows to see if the information we want is in there. 1366 */ 1367 cookie = dtrace_interrupt_disable(); 1368 if (dtrace_getotherwin() > 0) { 1369 value = dtrace_getreg_win(reg, 1); 1370 dtrace_interrupt_enable(cookie); 1371 1372 atomic_add_64(&fasttrap_getreg_fast_cnt, 1); 1373 1374 return (value); 1375 } 1376 dtrace_interrupt_enable(cookie); 1377 1378 /* 1379 * First check the machpcb structure to see if we've already read 1380 * in the register window we're looking for; if we haven't, (and 1381 * we probably haven't) try to copy in the value of the register. 1382 */ 1383 mpcb = (struct machpcb *)((caddr_t)rp - REGOFF); 1384 1385 if (get_udatamodel() == DATAMODEL_NATIVE) { 1386 struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS); 1387 1388 if (mpcb->mpcb_wbcnt > 0) { 1389 struct rwindow *rwin = (void *)mpcb->mpcb_wbuf; 1390 int i = mpcb->mpcb_wbcnt; 1391 do { 1392 i--; 1393 if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp) 1394 continue; 1395 1396 atomic_add_64(&fasttrap_getreg_mpcb_cnt, 1); 1397 return (rwin[i].rw_local[reg - 16]); 1398 } while (i > 0); 1399 } 1400 1401 if (fasttrap_fulword(&fr->fr_local[reg - 16], &value) != 0) 1402 goto err; 1403 } else { 1404 struct frame32 *fr = 1405 (struct frame32 *)(uintptr_t)(caddr32_t)rp->r_sp; 1406 uint32_t *v32 = (uint32_t *)&value; 1407 1408 if (mpcb->mpcb_wbcnt > 0) { 1409 struct rwindow32 *rwin = (void *)mpcb->mpcb_wbuf; 1410 int i = mpcb->mpcb_wbcnt; 1411 do { 1412 i--; 1413 if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp) 1414 continue; 1415 1416 atomic_add_64(&fasttrap_getreg_mpcb_cnt, 1); 1417 return (rwin[i].rw_local[reg - 16]); 1418 } while (i > 0); 1419 } 1420 1421 if (fasttrap_fuword32(&fr->fr_local[reg - 16], &v32[1]) != 0) 1422 goto err; 1423 1424 v32[0] = 0; 1425 } 1426 1427 atomic_add_64(&fasttrap_getreg_slow_cnt, 1); 1428 return (value); 1429 1430 err: 1431 /* 1432 * If the copy in failed, the process will be in a irrecoverable 1433 * state, and we have no choice but to kill it. 1434 */ 1435 psignal(ttoproc(curthread), SIGILL); 1436 return (0); 1437 } 1438 1439 static uint64_t fasttrap_putreg_fast_cnt; 1440 static uint64_t fasttrap_putreg_mpcb_cnt; 1441 static uint64_t fasttrap_putreg_slow_cnt; 1442 1443 static void 1444 fasttrap_putreg(struct regs *rp, uint_t reg, ulong_t value) 1445 { 1446 dtrace_icookie_t cookie; 1447 struct machpcb *mpcb; 1448 extern void dtrace_putreg_win(uint_t, ulong_t); 1449 1450 if (reg == 0) 1451 return; 1452 1453 if (reg < 16) { 1454 (&rp->r_g1)[reg - 1] = value; 1455 return; 1456 } 1457 1458 /* 1459 * If the user process is still using some register windows, we 1460 * can just place the value in the correct window. 1461 */ 1462 cookie = dtrace_interrupt_disable(); 1463 if (dtrace_getotherwin() > 0) { 1464 dtrace_putreg_win(reg, value); 1465 dtrace_interrupt_enable(cookie); 1466 atomic_add_64(&fasttrap_putreg_fast_cnt, 1); 1467 return; 1468 } 1469 dtrace_interrupt_enable(cookie); 1470 1471 /* 1472 * First see if there's a copy of the register window in the 1473 * machpcb structure that we can modify; if there isn't try to 1474 * copy out the value. If that fails, we try to create a new 1475 * register window in the machpcb structure. While this isn't 1476 * _precisely_ the intended use of the machpcb structure, it 1477 * can't cause any problems since we know at this point in the 1478 * code that all of the user's data have been flushed out of the 1479 * register file (since %otherwin is 0). 1480 */ 1481 mpcb = (struct machpcb *)((caddr_t)rp - REGOFF); 1482 1483 if (get_udatamodel() == DATAMODEL_NATIVE) { 1484 struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS); 1485 struct rwindow *rwin = (struct rwindow *)mpcb->mpcb_wbuf; 1486 1487 if (mpcb->mpcb_wbcnt > 0) { 1488 int i = mpcb->mpcb_wbcnt; 1489 do { 1490 i--; 1491 if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp) 1492 continue; 1493 1494 rwin[i].rw_local[reg - 16] = value; 1495 atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1); 1496 return; 1497 } while (i > 0); 1498 } 1499 1500 if (fasttrap_sulword(&fr->fr_local[reg - 16], value) != 0) { 1501 if (mpcb->mpcb_wbcnt >= MAXWIN || copyin(fr, 1502 &rwin[mpcb->mpcb_wbcnt], sizeof (*rwin)) != 0) 1503 goto err; 1504 1505 rwin[mpcb->mpcb_wbcnt].rw_local[reg - 16] = value; 1506 mpcb->mpcb_spbuf[mpcb->mpcb_wbcnt] = (caddr_t)rp->r_sp; 1507 mpcb->mpcb_wbcnt++; 1508 atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1); 1509 return; 1510 } 1511 } else { 1512 struct frame32 *fr = 1513 (struct frame32 *)(uintptr_t)(caddr32_t)rp->r_sp; 1514 struct rwindow32 *rwin = (struct rwindow32 *)mpcb->mpcb_wbuf; 1515 uint32_t v32 = (uint32_t)value; 1516 1517 if (mpcb->mpcb_wbcnt > 0) { 1518 int i = mpcb->mpcb_wbcnt; 1519 do { 1520 i--; 1521 if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp) 1522 continue; 1523 1524 rwin[i].rw_local[reg - 16] = v32; 1525 atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1); 1526 return; 1527 } while (i > 0); 1528 } 1529 1530 if (fasttrap_suword32(&fr->fr_local[reg - 16], v32) != 0) { 1531 if (mpcb->mpcb_wbcnt >= MAXWIN || copyin(fr, 1532 &rwin[mpcb->mpcb_wbcnt], sizeof (*rwin)) != 0) 1533 goto err; 1534 1535 rwin[mpcb->mpcb_wbcnt].rw_local[reg - 16] = v32; 1536 mpcb->mpcb_spbuf[mpcb->mpcb_wbcnt] = (caddr_t)rp->r_sp; 1537 mpcb->mpcb_wbcnt++; 1538 atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1); 1539 return; 1540 } 1541 } 1542 1543 atomic_add_64(&fasttrap_putreg_slow_cnt, 1); 1544 return; 1545 1546 err: 1547 /* 1548 * If we couldn't record this register's value, the process is in an 1549 * irrecoverable state and we have no choice but to euthanize it. 1550 */ 1551 psignal(ttoproc(curthread), SIGILL); 1552 } 1553