xref: /titanic_50/usr/src/uts/sparc/dtrace/fasttrap_isa.c (revision 1ed6b69a5ca1ca3ee5e9a4931f74e2237c7e1c9f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/fasttrap_isa.h>
28 #include <sys/fasttrap_impl.h>
29 #include <sys/dtrace.h>
30 #include <sys/dtrace_impl.h>
31 #include <sys/cmn_err.h>
32 #include <sys/frame.h>
33 #include <sys/stack.h>
34 #include <sys/sysmacros.h>
35 #include <sys/trap.h>
36 
37 #include <v9/sys/machpcb.h>
38 #include <v9/sys/privregs.h>
39 
40 /*
41  * Lossless User-Land Tracing on SPARC
42  * -----------------------------------
43  *
44  * The Basic Idea
45  *
46  * The most important design constraint is, of course, correct execution of
47  * the user thread above all else. The next most important goal is rapid
48  * execution. We combine execution of instructions in user-land with
49  * emulation of certain instructions in the kernel to aim for complete
50  * correctness and maximal performance.
51  *
52  * We take advantage of the split PC/NPC architecture to speed up logical
53  * single-stepping; when we copy an instruction out to the scratch space in
54  * the ulwp_t structure (held in the %g7 register on SPARC), we can
55  * effectively single step by setting the PC to our scratch space and leaving
56  * the NPC alone. This executes the replaced instruction and then continues
57  * on without having to reenter the kernel as with single- stepping. The
58  * obvious caveat is for instructions whose execution is PC dependant --
59  * branches, call and link instructions (call and jmpl), and the rdpc
60  * instruction. These instructions cannot be executed in the manner described
61  * so they must be emulated in the kernel.
62  *
63  * Emulation for this small set of instructions if fairly simple; the most
64  * difficult part being emulating branch conditions.
65  *
66  *
67  * A Cache Heavy Portfolio
68  *
69  * It's important to note at this time that copying an instruction out to the
70  * ulwp_t scratch space in user-land is rather complicated. SPARC has
71  * separate data and instruction caches so any writes to the D$ (using a
72  * store instruction for example) aren't necessarily reflected in the I$.
73  * The flush instruction can be used to synchronize the two and must be used
74  * for any self-modifying code, but the flush instruction only applies to the
75  * primary address space (the absence of a flusha analogue to the flush
76  * instruction that accepts an ASI argument is an obvious omission from SPARC
77  * v9 where the notion of the alternate address space was introduced on
78  * SPARC). To correctly copy out the instruction we must use a block store
79  * that doesn't allocate in the D$ and ensures synchronization with the I$;
80  * see dtrace_blksuword32() for the implementation  (this function uses
81  * ASI_BLK_COMMIT_S to write a block through the secondary ASI in the manner
82  * described). Refer to the UltraSPARC I/II manual for details on the
83  * ASI_BLK_COMMIT_S ASI.
84  *
85  *
86  * Return Subtleties
87  *
88  * When we're firing a return probe we need to expose the value returned by
89  * the function being traced. Since the function can set the return value
90  * in its last instruction, we need to fire the return probe only _after_
91  * the effects of the instruction are apparent. For instructions that we
92  * emulate, we can call dtrace_probe() after we've performed the emulation;
93  * for instructions that we execute after we return to user-land, we set
94  * %pc to the instruction we copied out (as described above) and set %npc
95  * to a trap instruction stashed in the ulwp_t structure. After the traced
96  * instruction is executed, the trap instruction returns control to the
97  * kernel where we can fire the return probe.
98  *
99  * This need for a second trap in cases where we execute the traced
100  * instruction makes it all the more important to emulate the most common
101  * instructions to avoid the second trip in and out of the kernel.
102  *
103  *
104  * Making it Fast
105  *
106  * Since copying out an instruction is neither simple nor inexpensive for the
107  * CPU, we should attempt to avoid doing it in as many cases as possible.
108  * Since function entry and return are usually the most interesting probe
109  * sites, we attempt to tune the performance of the fasttrap provider around
110  * instructions typically in those places.
111  *
112  * Looking at a bunch of functions in libraries and executables reveals that
113  * most functions begin with either a save or a sethi (to setup a larger
114  * argument to the save) and end with a restore or an or (in the case of leaf
115  * functions). To try to improve performance, we emulate all of these
116  * instructions in the kernel.
117  *
118  * The save and restore instructions are a little tricky since they perform
119  * register window maniplulation. Rather than trying to tinker with the
120  * register windows from the kernel, we emulate the implicit add that takes
121  * place as part of those instructions and set the %pc to point to a simple
122  * save or restore we've hidden in the ulwp_t structure. If we're in a return
123  * probe so want to make it seem as though the tracepoint has been completely
124  * executed we need to remember that we've pulled this trick with restore and
125  * pull registers from the previous window (the one that we'll switch to once
126  * the simple store instruction is executed) rather than the current one. This
127  * is why in the case of emulating a restore we set the DTrace CPU flag
128  * CPU_DTRACE_FAKERESTORE before calling dtrace_probe() for the return probes
129  * (see fasttrap_return_common()).
130  */
131 
132 #define	OP(x)		((x) >> 30)
133 #define	OP2(x)		(((x) >> 22) & 0x07)
134 #define	OP3(x)		(((x) >> 19) & 0x3f)
135 #define	RCOND(x)	(((x) >> 25) & 0x07)
136 #define	COND(x)		(((x) >> 25) & 0x0f)
137 #define	A(x)		(((x) >> 29) & 0x01)
138 #define	I(x)		(((x) >> 13) & 0x01)
139 #define	RD(x)		(((x) >> 25) & 0x1f)
140 #define	RS1(x)		(((x) >> 14) & 0x1f)
141 #define	RS2(x)		(((x) >> 0) & 0x1f)
142 #define	CC(x)		(((x) >> 20) & 0x03)
143 #define	DISP16(x)	((((x) >> 6) & 0xc000) | ((x) & 0x3fff))
144 #define	DISP22(x)	((x) & 0x3fffff)
145 #define	DISP19(x)	((x) & 0x7ffff)
146 #define	DISP30(x)	((x) & 0x3fffffff)
147 #define	SW_TRAP(x)	((x) & 0x7f)
148 
149 #define	OP3_OR		0x02
150 #define	OP3_RD		0x28
151 #define	OP3_JMPL	0x38
152 #define	OP3_RETURN	0x39
153 #define	OP3_TCC		0x3a
154 #define	OP3_SAVE	0x3c
155 #define	OP3_RESTORE	0x3d
156 
157 #define	OP3_PREFETCH	0x2d
158 #define	OP3_CASA	0x3c
159 #define	OP3_PREFETCHA	0x3d
160 #define	OP3_CASXA	0x3e
161 
162 #define	OP2_ILLTRAP	0x0
163 #define	OP2_BPcc	0x1
164 #define	OP2_Bicc	0x2
165 #define	OP2_BPr		0x3
166 #define	OP2_SETHI	0x4
167 #define	OP2_FBPfcc	0x5
168 #define	OP2_FBfcc	0x6
169 
170 #define	R_G0		0
171 #define	R_O0		8
172 #define	R_SP		14
173 #define	R_I0		24
174 #define	R_I1		25
175 #define	R_I2		26
176 #define	R_I3		27
177 #define	R_I4		28
178 
179 /*
180  * Check the comment in fasttrap.h when changing these offsets or adding
181  * new instructions.
182  */
183 #define	FASTTRAP_OFF_SAVE	64
184 #define	FASTTRAP_OFF_RESTORE	68
185 #define	FASTTRAP_OFF_FTRET	72
186 #define	FASTTRAP_OFF_RETURN	76
187 
188 #define	BREAKPOINT_INSTR	0x91d02001	/* ta 1 */
189 
190 /*
191  * Tunable to let users turn off the fancy save instruction optimization.
192  * If a program is non-ABI compliant, there's a possibility that the save
193  * instruction optimization could cause an error.
194  */
195 int fasttrap_optimize_save = 1;
196 
197 static uint64_t
198 fasttrap_anarg(struct regs *rp, int argno)
199 {
200 	uint64_t value;
201 
202 	if (argno < 6)
203 		return ((&rp->r_o0)[argno]);
204 
205 	if (curproc->p_model == DATAMODEL_NATIVE) {
206 		struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
207 
208 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
209 		value = dtrace_fulword(&fr->fr_argd[argno]);
210 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR |
211 		    CPU_DTRACE_BADALIGN);
212 	} else {
213 		struct frame32 *fr = (struct frame32 *)rp->r_sp;
214 
215 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
216 		value = dtrace_fuword32(&fr->fr_argd[argno]);
217 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR |
218 		    CPU_DTRACE_BADALIGN);
219 	}
220 
221 	return (value);
222 }
223 
224 static ulong_t fasttrap_getreg(struct regs *, uint_t);
225 static void fasttrap_putreg(struct regs *, uint_t, ulong_t);
226 
227 static void
228 fasttrap_usdt_args(fasttrap_probe_t *probe, struct regs *rp,
229     uint_t fake_restore, int argc, uintptr_t *argv)
230 {
231 	int i, x, cap = MIN(argc, probe->ftp_nargs);
232 	int inc = (fake_restore ? 16 : 0);
233 
234 	/*
235 	 * The only way we'll hit the fake_restore case is if a USDT probe is
236 	 * invoked as a tail-call. While it wouldn't be incorrect, we can
237 	 * avoid a call to fasttrap_getreg(), and safely use rp->r_sp
238 	 * directly since a tail-call can't be made if the invoked function
239 	 * would use the argument dump space (i.e. if there were more than
240 	 * 6 arguments). We take this shortcut because unconditionally rooting
241 	 * around for R_FP (R_SP + 16) would be unnecessarily painful.
242 	 */
243 
244 	if (curproc->p_model == DATAMODEL_NATIVE) {
245 		struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
246 		uintptr_t v;
247 
248 		for (i = 0; i < cap; i++) {
249 			x = probe->ftp_argmap[i];
250 
251 			if (x < 6)
252 				argv[i] = fasttrap_getreg(rp, R_O0 + x + inc);
253 			else if (fasttrap_fulword(&fr->fr_argd[x], &v) != 0)
254 				argv[i] = 0;
255 		}
256 
257 	} else {
258 		struct frame32 *fr = (struct frame32 *)rp->r_sp;
259 		uint32_t v;
260 
261 		for (i = 0; i < cap; i++) {
262 			x = probe->ftp_argmap[i];
263 
264 			if (x < 6)
265 				argv[i] = fasttrap_getreg(rp, R_O0 + x + inc);
266 			else if (fasttrap_fuword32(&fr->fr_argd[x], &v) != 0)
267 				argv[i] = 0;
268 		}
269 	}
270 
271 	for (; i < argc; i++) {
272 		argv[i] = 0;
273 	}
274 }
275 
276 static void
277 fasttrap_return_common(struct regs *rp, uintptr_t pc, pid_t pid,
278     uint_t fake_restore)
279 {
280 	fasttrap_tracepoint_t *tp;
281 	fasttrap_bucket_t *bucket;
282 	fasttrap_id_t *id;
283 	kmutex_t *pid_mtx;
284 	dtrace_icookie_t cookie;
285 
286 	pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
287 	mutex_enter(pid_mtx);
288 	bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
289 
290 	for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
291 		if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
292 		    tp->ftt_proc->ftpc_acount != 0)
293 			break;
294 	}
295 
296 	/*
297 	 * Don't sweat it if we can't find the tracepoint again; unlike
298 	 * when we're in fasttrap_pid_probe(), finding the tracepoint here
299 	 * is not essential to the correct execution of the process.
300 	 */
301 	if (tp == NULL || tp->ftt_retids == NULL) {
302 		mutex_exit(pid_mtx);
303 		return;
304 	}
305 
306 	for (id = tp->ftt_retids; id != NULL; id = id->fti_next) {
307 		fasttrap_probe_t *probe = id->fti_probe;
308 
309 		if (id->fti_ptype == DTFTP_POST_OFFSETS) {
310 			if (probe->ftp_argmap != NULL && fake_restore) {
311 				uintptr_t t[5];
312 
313 				fasttrap_usdt_args(probe, rp, fake_restore,
314 				    sizeof (t) / sizeof (t[0]), t);
315 
316 				cookie = dtrace_interrupt_disable();
317 				DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE);
318 				dtrace_probe(probe->ftp_id, t[0], t[1],
319 				    t[2], t[3], t[4]);
320 				DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE);
321 				dtrace_interrupt_enable(cookie);
322 
323 			} else if (probe->ftp_argmap != NULL) {
324 				uintptr_t t[5];
325 
326 				fasttrap_usdt_args(probe, rp, fake_restore,
327 				    sizeof (t) / sizeof (t[0]), t);
328 
329 				dtrace_probe(probe->ftp_id, t[0], t[1],
330 				    t[2], t[3], t[4]);
331 
332 			} else if (fake_restore) {
333 				uintptr_t arg0 = fasttrap_getreg(rp, R_I0);
334 				uintptr_t arg1 = fasttrap_getreg(rp, R_I1);
335 				uintptr_t arg2 = fasttrap_getreg(rp, R_I2);
336 				uintptr_t arg3 = fasttrap_getreg(rp, R_I3);
337 				uintptr_t arg4 = fasttrap_getreg(rp, R_I4);
338 
339 				cookie = dtrace_interrupt_disable();
340 				DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE);
341 				dtrace_probe(probe->ftp_id, arg0, arg1,
342 				    arg2, arg3, arg4);
343 				DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE);
344 				dtrace_interrupt_enable(cookie);
345 
346 			} else {
347 				dtrace_probe(probe->ftp_id, rp->r_o0, rp->r_o1,
348 				    rp->r_o2, rp->r_o3, rp->r_o4);
349 			}
350 
351 			continue;
352 		}
353 
354 		/*
355 		 * If this is only a possible return point, we must
356 		 * be looking at a potential tail call in leaf context.
357 		 * If the %npc is still within this function, then we
358 		 * must have misidentified a jmpl as a tail-call when it
359 		 * is, in fact, part of a jump table. It would be nice to
360 		 * remove this tracepoint, but this is neither the time
361 		 * nor the place.
362 		 */
363 		if ((tp->ftt_flags & FASTTRAP_F_RETMAYBE) &&
364 		    rp->r_npc - probe->ftp_faddr < probe->ftp_fsize)
365 			continue;
366 
367 		/*
368 		 * It's possible for a function to branch to the delay slot
369 		 * of an instruction that we've identified as a return site.
370 		 * We can dectect this spurious return probe activation by
371 		 * observing that in this case %npc will be %pc + 4 and %npc
372 		 * will be inside the current function (unless the user is
373 		 * doing _crazy_ instruction picking in which case there's
374 		 * very little we can do). The second check is important
375 		 * in case the last instructions of a function make a tail-
376 		 * call to the function located immediately subsequent.
377 		 */
378 		if (rp->r_npc == rp->r_pc + 4 &&
379 		    rp->r_npc - probe->ftp_faddr < probe->ftp_fsize)
380 			continue;
381 
382 		/*
383 		 * The first argument is the offset of return tracepoint
384 		 * in the function; the remaining arguments are the return
385 		 * values.
386 		 *
387 		 * If fake_restore is set, we need to pull the return values
388 		 * out of the %i's rather than the %o's -- a little trickier.
389 		 */
390 		if (!fake_restore) {
391 			dtrace_probe(probe->ftp_id, pc - probe->ftp_faddr,
392 			    rp->r_o0, rp->r_o1, rp->r_o2, rp->r_o3);
393 		} else {
394 			uintptr_t arg0 = fasttrap_getreg(rp, R_I0);
395 			uintptr_t arg1 = fasttrap_getreg(rp, R_I1);
396 			uintptr_t arg2 = fasttrap_getreg(rp, R_I2);
397 			uintptr_t arg3 = fasttrap_getreg(rp, R_I3);
398 
399 			cookie = dtrace_interrupt_disable();
400 			DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE);
401 			dtrace_probe(probe->ftp_id, pc - probe->ftp_faddr,
402 			    arg0, arg1, arg2, arg3);
403 			DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE);
404 			dtrace_interrupt_enable(cookie);
405 		}
406 	}
407 
408 	mutex_exit(pid_mtx);
409 }
410 
411 int
412 fasttrap_pid_probe(struct regs *rp)
413 {
414 	proc_t *p = curproc;
415 	fasttrap_tracepoint_t *tp, tp_local;
416 	fasttrap_id_t *id;
417 	pid_t pid;
418 	uintptr_t pc = rp->r_pc;
419 	uintptr_t npc = rp->r_npc;
420 	uintptr_t orig_pc = pc;
421 	fasttrap_bucket_t *bucket;
422 	kmutex_t *pid_mtx;
423 	uint_t fake_restore = 0, is_enabled = 0;
424 	dtrace_icookie_t cookie;
425 
426 	/*
427 	 * It's possible that a user (in a veritable orgy of bad planning)
428 	 * could redirect this thread's flow of control before it reached the
429 	 * return probe fasttrap. In this case we need to kill the process
430 	 * since it's in a unrecoverable state.
431 	 */
432 	if (curthread->t_dtrace_step) {
433 		ASSERT(curthread->t_dtrace_on);
434 		fasttrap_sigtrap(p, curthread, pc);
435 		return (0);
436 	}
437 
438 	/*
439 	 * Clear all user tracing flags.
440 	 */
441 	curthread->t_dtrace_ft = 0;
442 	curthread->t_dtrace_pc = 0;
443 	curthread->t_dtrace_npc = 0;
444 	curthread->t_dtrace_scrpc = 0;
445 	curthread->t_dtrace_astpc = 0;
446 
447 	/*
448 	 * Treat a child created by a call to vfork(2) as if it were its
449 	 * parent. We know that there's only one thread of control in such a
450 	 * process: this one.
451 	 */
452 	while (p->p_flag & SVFORK) {
453 		p = p->p_parent;
454 	}
455 
456 	pid = p->p_pid;
457 	pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
458 	mutex_enter(pid_mtx);
459 	bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
460 
461 	/*
462 	 * Lookup the tracepoint that the process just hit.
463 	 */
464 	for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
465 		if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
466 		    tp->ftt_proc->ftpc_acount != 0)
467 			break;
468 	}
469 
470 	/*
471 	 * If we couldn't find a matching tracepoint, either a tracepoint has
472 	 * been inserted without using the pid<pid> ioctl interface (see
473 	 * fasttrap_ioctl), or somehow we have mislaid this tracepoint.
474 	 */
475 	if (tp == NULL) {
476 		mutex_exit(pid_mtx);
477 		return (-1);
478 	}
479 
480 	for (id = tp->ftt_ids; id != NULL; id = id->fti_next) {
481 		fasttrap_probe_t *probe = id->fti_probe;
482 		int isentry = (id->fti_ptype == DTFTP_ENTRY);
483 
484 		if (id->fti_ptype == DTFTP_IS_ENABLED) {
485 			is_enabled = 1;
486 			continue;
487 		}
488 
489 		/*
490 		 * We note that this was an entry probe to help ustack() find
491 		 * the first caller.
492 		 */
493 		if (isentry) {
494 			cookie = dtrace_interrupt_disable();
495 			DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY);
496 		}
497 		dtrace_probe(probe->ftp_id, rp->r_o0, rp->r_o1, rp->r_o2,
498 		    rp->r_o3, rp->r_o4);
499 		if (isentry) {
500 			DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY);
501 			dtrace_interrupt_enable(cookie);
502 		}
503 	}
504 
505 	/*
506 	 * We're about to do a bunch of work so we cache a local copy of
507 	 * the tracepoint to emulate the instruction, and then find the
508 	 * tracepoint again later if we need to light up any return probes.
509 	 */
510 	tp_local = *tp;
511 	mutex_exit(pid_mtx);
512 	tp = &tp_local;
513 
514 	/*
515 	 * If there's an is-enabled probe conntected to this tracepoint it
516 	 * means that there was a 'mov %g0, %o0' instruction that was placed
517 	 * there by DTrace when the binary was linked. As this probe is, in
518 	 * fact, enabled, we need to stuff 1 into %o0. Accordingly, we can
519 	 * bypass all the instruction emulation logic since we know the
520 	 * inevitable result. It's possible that a user could construct a
521 	 * scenario where the 'is-enabled' probe was on some other
522 	 * instruction, but that would be a rather exotic way to shoot oneself
523 	 * in the foot.
524 	 */
525 	if (is_enabled) {
526 		rp->r_o0 = 1;
527 		pc = rp->r_npc;
528 		npc = pc + 4;
529 		goto done;
530 	}
531 
532 	/*
533 	 * We emulate certain types of instructions to ensure correctness
534 	 * (in the case of position dependent instructions) or optimize
535 	 * common cases. The rest we have the thread execute back in user-
536 	 * land.
537 	 */
538 	switch (tp->ftt_type) {
539 	case FASTTRAP_T_SAVE:
540 	{
541 		int32_t imm;
542 
543 		/*
544 		 * This an optimization to let us handle function entry
545 		 * probes more efficiently. Many functions begin with a save
546 		 * instruction that follows the pattern:
547 		 *	save	%sp, <imm>, %sp
548 		 *
549 		 * Meanwhile, we've stashed the instruction:
550 		 *	save	%g1, %g0, %sp
551 		 *
552 		 * off of %g7, so all we have to do is stick the right value
553 		 * into %g1 and reset %pc to point to the instruction we've
554 		 * cleverly hidden (%npc should not be touched).
555 		 */
556 
557 		imm = tp->ftt_instr << 19;
558 		imm >>= 19;
559 		rp->r_g1 = rp->r_sp + imm;
560 		pc = rp->r_g7 + FASTTRAP_OFF_SAVE;
561 		break;
562 	}
563 
564 	case FASTTRAP_T_RESTORE:
565 	{
566 		ulong_t value;
567 		uint_t rd;
568 
569 		/*
570 		 * This is an optimization to let us handle function
571 		 * return probes more efficiently. Most non-leaf functions
572 		 * end with the sequence:
573 		 *	ret
574 		 *	restore	<reg>, <reg_or_imm>, %oX
575 		 *
576 		 * We've stashed the instruction:
577 		 *	restore	%g0, %g0, %g0
578 		 *
579 		 * off of %g7 so we just need to place the correct value
580 		 * in the right %i register (since after our fake-o
581 		 * restore, the %i's will become the %o's) and set the %pc
582 		 * to point to our hidden restore. We also set fake_restore to
583 		 * let fasttrap_return_common() know that it will find the
584 		 * return values in the %i's rather than the %o's.
585 		 */
586 
587 		if (I(tp->ftt_instr)) {
588 			int32_t imm;
589 
590 			imm = tp->ftt_instr << 19;
591 			imm >>= 19;
592 			value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + imm;
593 		} else {
594 			value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) +
595 			    fasttrap_getreg(rp, RS2(tp->ftt_instr));
596 		}
597 
598 		/*
599 		 * Convert %o's to %i's; leave %g's as they are.
600 		 */
601 		rd = RD(tp->ftt_instr);
602 		fasttrap_putreg(rp, ((rd & 0x18) == 0x8) ? rd + 16 : rd, value);
603 
604 		pc = rp->r_g7 + FASTTRAP_OFF_RESTORE;
605 		fake_restore = 1;
606 		break;
607 	}
608 
609 	case FASTTRAP_T_RETURN:
610 	{
611 		uintptr_t target;
612 
613 		/*
614 		 * A return instruction is like a jmpl (without the link
615 		 * part) that executes an implicit restore. We've stashed
616 		 * the instruction:
617 		 *	return %o0
618 		 *
619 		 * off of %g7 so we just need to place the target in %o0
620 		 * and set the %pc to point to the stashed return instruction.
621 		 * We use %o0 since that register disappears after the return
622 		 * executes, erasing any evidence of this tampering.
623 		 */
624 		if (I(tp->ftt_instr)) {
625 			int32_t imm;
626 
627 			imm = tp->ftt_instr << 19;
628 			imm >>= 19;
629 			target = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + imm;
630 		} else {
631 			target = fasttrap_getreg(rp, RS1(tp->ftt_instr)) +
632 			    fasttrap_getreg(rp, RS2(tp->ftt_instr));
633 		}
634 
635 		fasttrap_putreg(rp, R_O0, target);
636 
637 		pc = rp->r_g7 + FASTTRAP_OFF_RETURN;
638 		fake_restore = 1;
639 		break;
640 	}
641 
642 	case FASTTRAP_T_OR:
643 	{
644 		ulong_t value;
645 
646 		if (I(tp->ftt_instr)) {
647 			int32_t imm;
648 
649 			imm = tp->ftt_instr << 19;
650 			imm >>= 19;
651 			value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) | imm;
652 		} else {
653 			value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) |
654 			    fasttrap_getreg(rp, RS2(tp->ftt_instr));
655 		}
656 
657 		fasttrap_putreg(rp, RD(tp->ftt_instr), value);
658 		pc = rp->r_npc;
659 		npc = pc + 4;
660 		break;
661 	}
662 
663 	case FASTTRAP_T_SETHI:
664 		if (RD(tp->ftt_instr) != R_G0) {
665 			uint32_t imm32 = tp->ftt_instr << 10;
666 			fasttrap_putreg(rp, RD(tp->ftt_instr), (ulong_t)imm32);
667 		}
668 		pc = rp->r_npc;
669 		npc = pc + 4;
670 		break;
671 
672 	case FASTTRAP_T_CCR:
673 	{
674 		uint_t c, v, z, n, taken;
675 		uint_t ccr = rp->r_tstate >> TSTATE_CCR_SHIFT;
676 
677 		if (tp->ftt_cc != 0)
678 			ccr >>= 4;
679 
680 		c = (ccr >> 0) & 1;
681 		v = (ccr >> 1) & 1;
682 		z = (ccr >> 2) & 1;
683 		n = (ccr >> 3) & 1;
684 
685 		switch (tp->ftt_code) {
686 		case 0x0:	/* BN */
687 			taken = 0;		break;
688 		case 0x1:	/* BE */
689 			taken = z;		break;
690 		case 0x2:	/* BLE */
691 			taken = z | (n ^ v);	break;
692 		case 0x3:	/* BL */
693 			taken = n ^ v;		break;
694 		case 0x4:	/* BLEU */
695 			taken = c | z;		break;
696 		case 0x5:	/* BCS (BLU) */
697 			taken = c;		break;
698 		case 0x6:	/* BNEG */
699 			taken = n;		break;
700 		case 0x7:	/* BVS */
701 			taken = v;		break;
702 		case 0x8:	/* BA */
703 			/*
704 			 * We handle the BA case differently since the annul
705 			 * bit means something slightly different.
706 			 */
707 			panic("fasttrap: mishandled a branch");
708 			taken = 1;		break;
709 		case 0x9:	/* BNE */
710 			taken = ~z;		break;
711 		case 0xa:	/* BG */
712 			taken = ~(z | (n ^ v));	break;
713 		case 0xb:	/* BGE */
714 			taken = ~(n ^ v);	break;
715 		case 0xc:	/* BGU */
716 			taken = ~(c | z);	break;
717 		case 0xd:	/* BCC (BGEU) */
718 			taken = ~c;		break;
719 		case 0xe:	/* BPOS */
720 			taken = ~n;		break;
721 		case 0xf:	/* BVC */
722 			taken = ~v;		break;
723 		}
724 
725 		if (taken & 1) {
726 			pc = rp->r_npc;
727 			npc = tp->ftt_dest;
728 		} else if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
729 			/*
730 			 * Untaken annulled branches don't execute the
731 			 * instruction in the delay slot.
732 			 */
733 			pc = rp->r_npc + 4;
734 			npc = pc + 4;
735 		} else {
736 			pc = rp->r_npc;
737 			npc = pc + 4;
738 		}
739 		break;
740 	}
741 
742 	case FASTTRAP_T_FCC:
743 	{
744 		uint_t fcc;
745 		uint_t taken;
746 		uint64_t fsr;
747 
748 		dtrace_getfsr(&fsr);
749 
750 		if (tp->ftt_cc == 0) {
751 			fcc = (fsr >> 10) & 0x3;
752 		} else {
753 			uint_t shift;
754 			ASSERT(tp->ftt_cc <= 3);
755 			shift = 30 + tp->ftt_cc * 2;
756 			fcc = (fsr >> shift) & 0x3;
757 		}
758 
759 		switch (tp->ftt_code) {
760 		case 0x0:	/* FBN */
761 			taken = (1 << fcc) & (0|0|0|0);	break;
762 		case 0x1:	/* FBNE */
763 			taken = (1 << fcc) & (8|4|2|0);	break;
764 		case 0x2:	/* FBLG */
765 			taken = (1 << fcc) & (0|4|2|0);	break;
766 		case 0x3:	/* FBUL */
767 			taken = (1 << fcc) & (8|0|2|0);	break;
768 		case 0x4:	/* FBL */
769 			taken = (1 << fcc) & (0|0|2|0);	break;
770 		case 0x5:	/* FBUG */
771 			taken = (1 << fcc) & (8|4|0|0);	break;
772 		case 0x6:	/* FBG */
773 			taken = (1 << fcc) & (0|4|0|0);	break;
774 		case 0x7:	/* FBU */
775 			taken = (1 << fcc) & (8|0|0|0);	break;
776 		case 0x8:	/* FBA */
777 			/*
778 			 * We handle the FBA case differently since the annul
779 			 * bit means something slightly different.
780 			 */
781 			panic("fasttrap: mishandled a branch");
782 			taken = (1 << fcc) & (8|4|2|1);	break;
783 		case 0x9:	/* FBE */
784 			taken = (1 << fcc) & (0|0|0|1);	break;
785 		case 0xa:	/* FBUE */
786 			taken = (1 << fcc) & (8|0|0|1);	break;
787 		case 0xb:	/* FBGE */
788 			taken = (1 << fcc) & (0|4|0|1);	break;
789 		case 0xc:	/* FBUGE */
790 			taken = (1 << fcc) & (8|4|0|1);	break;
791 		case 0xd:	/* FBLE */
792 			taken = (1 << fcc) & (0|0|2|1);	break;
793 		case 0xe:	/* FBULE */
794 			taken = (1 << fcc) & (8|0|2|1);	break;
795 		case 0xf:	/* FBO */
796 			taken = (1 << fcc) & (0|4|2|1);	break;
797 		}
798 
799 		if (taken) {
800 			pc = rp->r_npc;
801 			npc = tp->ftt_dest;
802 		} else if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
803 			/*
804 			 * Untaken annulled branches don't execute the
805 			 * instruction in the delay slot.
806 			 */
807 			pc = rp->r_npc + 4;
808 			npc = pc + 4;
809 		} else {
810 			pc = rp->r_npc;
811 			npc = pc + 4;
812 		}
813 		break;
814 	}
815 
816 	case FASTTRAP_T_REG:
817 	{
818 		int64_t value;
819 		uint_t taken;
820 		uint_t reg = RS1(tp->ftt_instr);
821 
822 		/*
823 		 * An ILP32 process shouldn't be using a branch predicated on
824 		 * an %i or an %l since it would violate the ABI. It's a
825 		 * violation of the ABI because we can't ensure deterministic
826 		 * behavior. We should have identified this case when we
827 		 * enabled the probe.
828 		 */
829 		ASSERT(p->p_model == DATAMODEL_LP64 || reg < 16);
830 
831 		value = (int64_t)fasttrap_getreg(rp, reg);
832 
833 		switch (tp->ftt_code) {
834 		case 0x1:	/* BRZ */
835 			taken = (value == 0);	break;
836 		case 0x2:	/* BRLEZ */
837 			taken = (value <= 0);	break;
838 		case 0x3:	/* BRLZ */
839 			taken = (value < 0);	break;
840 		case 0x5:	/* BRNZ */
841 			taken = (value != 0);	break;
842 		case 0x6:	/* BRGZ */
843 			taken = (value > 0);	break;
844 		case 0x7:	/* BRGEZ */
845 			taken = (value >= 0);	break;
846 		default:
847 		case 0x0:
848 		case 0x4:
849 			panic("fasttrap: mishandled a branch");
850 		}
851 
852 		if (taken) {
853 			pc = rp->r_npc;
854 			npc = tp->ftt_dest;
855 		} else if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
856 			/*
857 			 * Untaken annulled branches don't execute the
858 			 * instruction in the delay slot.
859 			 */
860 			pc = rp->r_npc + 4;
861 			npc = pc + 4;
862 		} else {
863 			pc = rp->r_npc;
864 			npc = pc + 4;
865 		}
866 		break;
867 	}
868 
869 	case FASTTRAP_T_ALWAYS:
870 		/*
871 		 * BAs, BA,As...
872 		 */
873 
874 		if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
875 			/*
876 			 * Annulled branch always instructions never execute
877 			 * the instruction in the delay slot.
878 			 */
879 			pc = tp->ftt_dest;
880 			npc = tp->ftt_dest + 4;
881 		} else {
882 			pc = rp->r_npc;
883 			npc = tp->ftt_dest;
884 		}
885 		break;
886 
887 	case FASTTRAP_T_RDPC:
888 		fasttrap_putreg(rp, RD(tp->ftt_instr), rp->r_pc);
889 		pc = rp->r_npc;
890 		npc = pc + 4;
891 		break;
892 
893 	case FASTTRAP_T_CALL:
894 		/*
895 		 * It's a call _and_ link remember...
896 		 */
897 		rp->r_o7 = rp->r_pc;
898 		pc = rp->r_npc;
899 		npc = tp->ftt_dest;
900 		break;
901 
902 	case FASTTRAP_T_JMPL:
903 		pc = rp->r_npc;
904 
905 		if (I(tp->ftt_instr)) {
906 			uint_t rs1 = RS1(tp->ftt_instr);
907 			int32_t imm;
908 
909 			imm = tp->ftt_instr << 19;
910 			imm >>= 19;
911 			npc = fasttrap_getreg(rp, rs1) + imm;
912 		} else {
913 			uint_t rs1 = RS1(tp->ftt_instr);
914 			uint_t rs2 = RS2(tp->ftt_instr);
915 
916 			npc = fasttrap_getreg(rp, rs1) +
917 			    fasttrap_getreg(rp, rs2);
918 		}
919 
920 		/*
921 		 * Do the link part of the jump-and-link instruction.
922 		 */
923 		fasttrap_putreg(rp, RD(tp->ftt_instr), rp->r_pc);
924 
925 		break;
926 
927 	case FASTTRAP_T_COMMON:
928 	{
929 		curthread->t_dtrace_scrpc = rp->r_g7;
930 		curthread->t_dtrace_astpc = rp->r_g7 + FASTTRAP_OFF_FTRET;
931 
932 		/*
933 		 * Copy the instruction to a reserved location in the
934 		 * user-land thread structure, then set the PC to that
935 		 * location and leave the NPC alone. We take pains to ensure
936 		 * consistency in the instruction stream (See SPARC
937 		 * Architecture Manual Version 9, sections 8.4.7, A.20, and
938 		 * H.1.6; UltraSPARC I/II User's Manual, sections 3.1.1.1,
939 		 * and 13.6.4) by using the ASI ASI_BLK_COMMIT_S to copy the
940 		 * instruction into the user's address space without
941 		 * bypassing the I$. There's no AS_USER version of this ASI
942 		 * (as exist for other ASIs) so we use the lofault
943 		 * mechanism to catch faults.
944 		 */
945 		if (dtrace_blksuword32(rp->r_g7, &tp->ftt_instr, 1) == -1) {
946 			/*
947 			 * If the copyout fails, then the process's state
948 			 * is not consistent (the effects of the traced
949 			 * instruction will never be seen). This process
950 			 * cannot be allowed to continue execution.
951 			 */
952 			fasttrap_sigtrap(curproc, curthread, pc);
953 			return (0);
954 		}
955 
956 		curthread->t_dtrace_pc = pc;
957 		curthread->t_dtrace_npc = npc;
958 		curthread->t_dtrace_on = 1;
959 
960 		pc = curthread->t_dtrace_scrpc;
961 
962 		if (tp->ftt_retids != NULL) {
963 			curthread->t_dtrace_step = 1;
964 			curthread->t_dtrace_ret = 1;
965 			npc = curthread->t_dtrace_astpc;
966 		}
967 		break;
968 	}
969 
970 	default:
971 		panic("fasttrap: mishandled an instruction");
972 	}
973 
974 	/*
975 	 * This bit me in the ass a couple of times, so lets toss this
976 	 * in as a cursory sanity check.
977 	 */
978 	ASSERT(pc != rp->r_g7 + 4);
979 	ASSERT(pc != rp->r_g7 + 8);
980 
981 done:
982 	/*
983 	 * If there were no return probes when we first found the tracepoint,
984 	 * we should feel no obligation to honor any return probes that were
985 	 * subsequently enabled -- they'll just have to wait until the next
986 	 * time around.
987 	 */
988 	if (tp->ftt_retids != NULL) {
989 		/*
990 		 * We need to wait until the results of the instruction are
991 		 * apparent before invoking any return probes. If this
992 		 * instruction was emulated we can just call
993 		 * fasttrap_return_common(); if it needs to be executed, we
994 		 * need to wait until we return to the kernel.
995 		 */
996 		if (tp->ftt_type != FASTTRAP_T_COMMON) {
997 			fasttrap_return_common(rp, orig_pc, pid, fake_restore);
998 		} else {
999 			ASSERT(curthread->t_dtrace_ret != 0);
1000 			ASSERT(curthread->t_dtrace_pc == orig_pc);
1001 			ASSERT(curthread->t_dtrace_scrpc == rp->r_g7);
1002 			ASSERT(npc == curthread->t_dtrace_astpc);
1003 		}
1004 	}
1005 
1006 	ASSERT(pc != 0);
1007 	rp->r_pc = pc;
1008 	rp->r_npc = npc;
1009 
1010 	return (0);
1011 }
1012 
1013 int
1014 fasttrap_return_probe(struct regs *rp)
1015 {
1016 	proc_t *p = ttoproc(curthread);
1017 	pid_t pid;
1018 	uintptr_t pc = curthread->t_dtrace_pc;
1019 	uintptr_t npc = curthread->t_dtrace_npc;
1020 
1021 	curthread->t_dtrace_pc = 0;
1022 	curthread->t_dtrace_npc = 0;
1023 	curthread->t_dtrace_scrpc = 0;
1024 	curthread->t_dtrace_astpc = 0;
1025 
1026 	/*
1027 	 * Treat a child created by a call to vfork(2) as if it were its
1028 	 * parent. We know there's only one thread of control in such a
1029 	 * process: this one.
1030 	 */
1031 	while (p->p_flag & SVFORK) {
1032 		p = p->p_parent;
1033 	}
1034 
1035 	/*
1036 	 * We set the %pc and %npc to their values when the traced
1037 	 * instruction was initially executed so that it appears to
1038 	 * dtrace_probe() that we're on the original instruction, and so that
1039 	 * the user can't easily detect our complex web of lies.
1040 	 * dtrace_return_probe() (our caller) will correctly set %pc and %npc
1041 	 * after we return.
1042 	 */
1043 	rp->r_pc = pc;
1044 	rp->r_npc = npc;
1045 
1046 	pid = p->p_pid;
1047 	fasttrap_return_common(rp, pc, pid, 0);
1048 
1049 	return (0);
1050 }
1051 
1052 int
1053 fasttrap_tracepoint_install(proc_t *p, fasttrap_tracepoint_t *tp)
1054 {
1055 	fasttrap_instr_t instr = FASTTRAP_INSTR;
1056 
1057 	if (uwrite(p, &instr, 4, tp->ftt_pc) != 0)
1058 		return (-1);
1059 
1060 	return (0);
1061 }
1062 
1063 int
1064 fasttrap_tracepoint_remove(proc_t *p, fasttrap_tracepoint_t *tp)
1065 {
1066 	fasttrap_instr_t instr;
1067 
1068 	/*
1069 	 * Distinguish between read or write failures and a changed
1070 	 * instruction.
1071 	 */
1072 	if (uread(p, &instr, 4, tp->ftt_pc) != 0)
1073 		return (0);
1074 	if (instr != FASTTRAP_INSTR && instr != BREAKPOINT_INSTR)
1075 		return (0);
1076 	if (uwrite(p, &tp->ftt_instr, 4, tp->ftt_pc) != 0)
1077 		return (-1);
1078 
1079 	return (0);
1080 }
1081 
1082 int
1083 fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, uintptr_t pc,
1084     fasttrap_probe_type_t type)
1085 {
1086 	uint32_t instr;
1087 	int32_t disp;
1088 
1089 	/*
1090 	 * Read the instruction at the given address out of the process's
1091 	 * address space. We don't have to worry about a debugger
1092 	 * changing this instruction before we overwrite it with our trap
1093 	 * instruction since P_PR_LOCK is set.
1094 	 */
1095 	if (uread(p, &instr, 4, pc) != 0)
1096 		return (-1);
1097 
1098 	/*
1099 	 * Decode the instruction to fill in the probe flags. We can have
1100 	 * the process execute most instructions on its own using a pc/npc
1101 	 * trick, but pc-relative control transfer present a problem since
1102 	 * we're relocating the instruction. We emulate these instructions
1103 	 * in the kernel. We assume a default type and over-write that as
1104 	 * needed.
1105 	 *
1106 	 * pc-relative instructions must be emulated for correctness;
1107 	 * other instructions (which represent a large set of commonly traced
1108 	 * instructions) are emulated or otherwise optimized for performance.
1109 	 */
1110 	tp->ftt_type = FASTTRAP_T_COMMON;
1111 	if (OP(instr) == 1) {
1112 		/*
1113 		 * Call instructions.
1114 		 */
1115 		tp->ftt_type = FASTTRAP_T_CALL;
1116 		disp = DISP30(instr) << 2;
1117 		tp->ftt_dest = pc + (intptr_t)disp;
1118 
1119 	} else if (OP(instr) == 0) {
1120 		/*
1121 		 * Branch instructions.
1122 		 *
1123 		 * Unconditional branches need careful attention when they're
1124 		 * annulled: annulled unconditional branches never execute
1125 		 * the instruction in the delay slot.
1126 		 */
1127 		switch (OP2(instr)) {
1128 		case OP2_ILLTRAP:
1129 		case 0x7:
1130 			/*
1131 			 * The compiler may place an illtrap after a call to
1132 			 * a function that returns a structure. In the case of
1133 			 * a returned structure, the compiler places an illtrap
1134 			 * whose const22 field is the size of the returned
1135 			 * structure immediately following the delay slot of
1136 			 * the call. To stay out of the way, we refuse to
1137 			 * place tracepoints on top of illtrap instructions.
1138 			 *
1139 			 * This is one of the dumbest architectural decisions
1140 			 * I've ever had to work around.
1141 			 *
1142 			 * We also identify the only illegal op2 value (See
1143 			 * SPARC Architecture Manual Version 9, E.2 table 31).
1144 			 */
1145 			return (-1);
1146 
1147 		case OP2_BPcc:
1148 			if (COND(instr) == 8) {
1149 				tp->ftt_type = FASTTRAP_T_ALWAYS;
1150 			} else {
1151 				/*
1152 				 * Check for an illegal instruction.
1153 				 */
1154 				if (CC(instr) & 1)
1155 					return (-1);
1156 				tp->ftt_type = FASTTRAP_T_CCR;
1157 				tp->ftt_cc = CC(instr);
1158 				tp->ftt_code = COND(instr);
1159 			}
1160 
1161 			if (A(instr) != 0)
1162 				tp->ftt_flags |= FASTTRAP_F_ANNUL;
1163 
1164 			disp = DISP19(instr);
1165 			disp <<= 13;
1166 			disp >>= 11;
1167 			tp->ftt_dest = pc + (intptr_t)disp;
1168 			break;
1169 
1170 		case OP2_Bicc:
1171 			if (COND(instr) == 8) {
1172 				tp->ftt_type = FASTTRAP_T_ALWAYS;
1173 			} else {
1174 				tp->ftt_type = FASTTRAP_T_CCR;
1175 				tp->ftt_cc = 0;
1176 				tp->ftt_code = COND(instr);
1177 			}
1178 
1179 			if (A(instr) != 0)
1180 				tp->ftt_flags |= FASTTRAP_F_ANNUL;
1181 
1182 			disp = DISP22(instr);
1183 			disp <<= 10;
1184 			disp >>= 8;
1185 			tp->ftt_dest = pc + (intptr_t)disp;
1186 			break;
1187 
1188 		case OP2_BPr:
1189 			/*
1190 			 * Check for an illegal instruction.
1191 			 */
1192 			if ((RCOND(instr) & 3) == 0)
1193 				return (-1);
1194 
1195 			/*
1196 			 * It's a violation of the v8plus ABI to use a
1197 			 * register-predicated branch in a 32-bit app if
1198 			 * the register used is an %l or an %i (%gs and %os
1199 			 * are legit because they're not saved to the stack
1200 			 * in 32-bit words when we take a trap).
1201 			 */
1202 			if (p->p_model == DATAMODEL_ILP32 && RS1(instr) >= 16)
1203 				return (-1);
1204 
1205 			tp->ftt_type = FASTTRAP_T_REG;
1206 			if (A(instr) != 0)
1207 				tp->ftt_flags |= FASTTRAP_F_ANNUL;
1208 			disp = DISP16(instr);
1209 			disp <<= 16;
1210 			disp >>= 14;
1211 			tp->ftt_dest = pc + (intptr_t)disp;
1212 			tp->ftt_code = RCOND(instr);
1213 			break;
1214 
1215 		case OP2_SETHI:
1216 			tp->ftt_type = FASTTRAP_T_SETHI;
1217 			break;
1218 
1219 		case OP2_FBPfcc:
1220 			if (COND(instr) == 8) {
1221 				tp->ftt_type = FASTTRAP_T_ALWAYS;
1222 			} else {
1223 				tp->ftt_type = FASTTRAP_T_FCC;
1224 				tp->ftt_cc = CC(instr);
1225 				tp->ftt_code = COND(instr);
1226 			}
1227 
1228 			if (A(instr) != 0)
1229 				tp->ftt_flags |= FASTTRAP_F_ANNUL;
1230 
1231 			disp = DISP19(instr);
1232 			disp <<= 13;
1233 			disp >>= 11;
1234 			tp->ftt_dest = pc + (intptr_t)disp;
1235 			break;
1236 
1237 		case OP2_FBfcc:
1238 			if (COND(instr) == 8) {
1239 				tp->ftt_type = FASTTRAP_T_ALWAYS;
1240 			} else {
1241 				tp->ftt_type = FASTTRAP_T_FCC;
1242 				tp->ftt_cc = 0;
1243 				tp->ftt_code = COND(instr);
1244 			}
1245 
1246 			if (A(instr) != 0)
1247 				tp->ftt_flags |= FASTTRAP_F_ANNUL;
1248 
1249 			disp = DISP22(instr);
1250 			disp <<= 10;
1251 			disp >>= 8;
1252 			tp->ftt_dest = pc + (intptr_t)disp;
1253 			break;
1254 		}
1255 
1256 	} else if (OP(instr) == 2) {
1257 		switch (OP3(instr)) {
1258 		case OP3_RETURN:
1259 			tp->ftt_type = FASTTRAP_T_RETURN;
1260 			break;
1261 
1262 		case OP3_JMPL:
1263 			tp->ftt_type = FASTTRAP_T_JMPL;
1264 			break;
1265 
1266 		case OP3_RD:
1267 			if (RS1(instr) == 5)
1268 				tp->ftt_type = FASTTRAP_T_RDPC;
1269 			break;
1270 
1271 		case OP3_SAVE:
1272 			/*
1273 			 * We optimize for save instructions at function
1274 			 * entry; see the comment in fasttrap_pid_probe()
1275 			 * (near FASTTRAP_T_SAVE) for details.
1276 			 */
1277 			if (fasttrap_optimize_save != 0 &&
1278 			    type == DTFTP_ENTRY &&
1279 			    I(instr) == 1 && RD(instr) == R_SP)
1280 				tp->ftt_type = FASTTRAP_T_SAVE;
1281 			break;
1282 
1283 		case OP3_RESTORE:
1284 			/*
1285 			 * We optimize restore instructions at function
1286 			 * return; see the comment in fasttrap_pid_probe()
1287 			 * (near FASTTRAP_T_RESTORE) for details.
1288 			 *
1289 			 * rd must be an %o or %g register.
1290 			 */
1291 			if ((RD(instr) & 0x10) == 0)
1292 				tp->ftt_type = FASTTRAP_T_RESTORE;
1293 			break;
1294 
1295 		case OP3_OR:
1296 			/*
1297 			 * A large proportion of instructions in the delay
1298 			 * slot of retl instructions are or's so we emulate
1299 			 * these downstairs as an optimization.
1300 			 */
1301 			tp->ftt_type = FASTTRAP_T_OR;
1302 			break;
1303 
1304 		case OP3_TCC:
1305 			/*
1306 			 * Breakpoint instructions are effectively position-
1307 			 * dependent since the debugger uses the %pc value
1308 			 * to lookup which breakpoint was executed. As a
1309 			 * result, we can't actually instrument breakpoints.
1310 			 */
1311 			if (SW_TRAP(instr) == ST_BREAKPOINT)
1312 				return (-1);
1313 			break;
1314 
1315 		case 0x19:
1316 		case 0x1d:
1317 		case 0x29:
1318 		case 0x33:
1319 		case 0x3f:
1320 			/*
1321 			 * Identify illegal instructions (See SPARC
1322 			 * Architecture Manual Version 9, E.2 table 32).
1323 			 */
1324 			return (-1);
1325 		}
1326 	} else if (OP(instr) == 3) {
1327 		uint32_t op3 = OP3(instr);
1328 
1329 		/*
1330 		 * Identify illegal instructions (See SPARC Architecture
1331 		 * Manual Version 9, E.2 table 33).
1332 		 */
1333 		if ((op3 & 0x28) == 0x28) {
1334 			if (op3 != OP3_PREFETCH && op3 != OP3_CASA &&
1335 			    op3 != OP3_PREFETCHA && op3 != OP3_CASXA)
1336 				return (-1);
1337 		} else {
1338 			if ((op3 & 0x0f) == 0x0c || (op3 & 0x3b) == 0x31)
1339 				return (-1);
1340 		}
1341 	}
1342 
1343 	tp->ftt_instr = instr;
1344 
1345 	/*
1346 	 * We don't know how this tracepoint is going to be used, but in case
1347 	 * it's used as part of a function return probe, we need to indicate
1348 	 * whether it's always a return site or only potentially a return
1349 	 * site. If it's part of a return probe, it's always going to be a
1350 	 * return from that function if it's a restore instruction or if
1351 	 * the previous instruction was a return. If we could reliably
1352 	 * distinguish jump tables from return sites, this wouldn't be
1353 	 * necessary.
1354 	 */
1355 	if (tp->ftt_type != FASTTRAP_T_RESTORE &&
1356 	    (uread(p, &instr, 4, pc - sizeof (instr)) != 0 ||
1357 	    !(OP(instr) == 2 && OP3(instr) == OP3_RETURN)))
1358 		tp->ftt_flags |= FASTTRAP_F_RETMAYBE;
1359 
1360 	return (0);
1361 }
1362 
1363 /*ARGSUSED*/
1364 uint64_t
1365 fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
1366     int aframes)
1367 {
1368 	return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, argno));
1369 }
1370 
1371 /*ARGSUSED*/
1372 uint64_t
1373 fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
1374     int aframes)
1375 {
1376 	return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, argno));
1377 }
1378 
1379 static uint64_t fasttrap_getreg_fast_cnt;
1380 static uint64_t fasttrap_getreg_mpcb_cnt;
1381 static uint64_t fasttrap_getreg_slow_cnt;
1382 
1383 static ulong_t
1384 fasttrap_getreg(struct regs *rp, uint_t reg)
1385 {
1386 	ulong_t value;
1387 	dtrace_icookie_t cookie;
1388 	struct machpcb *mpcb;
1389 	extern ulong_t dtrace_getreg_win(uint_t, uint_t);
1390 
1391 	/*
1392 	 * We have the %os and %gs in our struct regs, but if we need to
1393 	 * snag a %l or %i we need to go scrounging around in the process's
1394 	 * address space.
1395 	 */
1396 	if (reg == 0)
1397 		return (0);
1398 
1399 	if (reg < 16)
1400 		return ((&rp->r_g1)[reg - 1]);
1401 
1402 	/*
1403 	 * Before we look at the user's stack, we'll check the register
1404 	 * windows to see if the information we want is in there.
1405 	 */
1406 	cookie = dtrace_interrupt_disable();
1407 	if (dtrace_getotherwin() > 0) {
1408 		value = dtrace_getreg_win(reg, 1);
1409 		dtrace_interrupt_enable(cookie);
1410 
1411 		atomic_inc_64(&fasttrap_getreg_fast_cnt);
1412 
1413 		return (value);
1414 	}
1415 	dtrace_interrupt_enable(cookie);
1416 
1417 	/*
1418 	 * First check the machpcb structure to see if we've already read
1419 	 * in the register window we're looking for; if we haven't, (and
1420 	 * we probably haven't) try to copy in the value of the register.
1421 	 */
1422 	/* LINTED - alignment */
1423 	mpcb = (struct machpcb *)((caddr_t)rp - REGOFF);
1424 
1425 	if (get_udatamodel() == DATAMODEL_NATIVE) {
1426 		struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
1427 
1428 		if (mpcb->mpcb_wbcnt > 0) {
1429 			struct rwindow *rwin = (void *)mpcb->mpcb_wbuf;
1430 			int i = mpcb->mpcb_wbcnt;
1431 			do {
1432 				i--;
1433 				if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1434 					continue;
1435 
1436 				atomic_inc_64(&fasttrap_getreg_mpcb_cnt);
1437 				return (rwin[i].rw_local[reg - 16]);
1438 			} while (i > 0);
1439 		}
1440 
1441 		if (fasttrap_fulword(&fr->fr_local[reg - 16], &value) != 0)
1442 			goto err;
1443 	} else {
1444 		struct frame32 *fr =
1445 		    (struct frame32 *)(uintptr_t)(caddr32_t)rp->r_sp;
1446 		uint32_t *v32 = (uint32_t *)&value;
1447 
1448 		if (mpcb->mpcb_wbcnt > 0) {
1449 			struct rwindow32 *rwin = (void *)mpcb->mpcb_wbuf;
1450 			int i = mpcb->mpcb_wbcnt;
1451 			do {
1452 				i--;
1453 				if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1454 					continue;
1455 
1456 				atomic_inc_64(&fasttrap_getreg_mpcb_cnt);
1457 				return (rwin[i].rw_local[reg - 16]);
1458 			} while (i > 0);
1459 		}
1460 
1461 		if (fasttrap_fuword32(&fr->fr_local[reg - 16], &v32[1]) != 0)
1462 			goto err;
1463 
1464 		v32[0] = 0;
1465 	}
1466 
1467 	atomic_inc_64(&fasttrap_getreg_slow_cnt);
1468 	return (value);
1469 
1470 err:
1471 	/*
1472 	 * If the copy in failed, the process will be in a irrecoverable
1473 	 * state, and we have no choice but to kill it.
1474 	 */
1475 	psignal(ttoproc(curthread), SIGILL);
1476 	return (0);
1477 }
1478 
1479 static uint64_t fasttrap_putreg_fast_cnt;
1480 static uint64_t fasttrap_putreg_mpcb_cnt;
1481 static uint64_t fasttrap_putreg_slow_cnt;
1482 
1483 static void
1484 fasttrap_putreg(struct regs *rp, uint_t reg, ulong_t value)
1485 {
1486 	dtrace_icookie_t cookie;
1487 	struct machpcb *mpcb;
1488 	extern void dtrace_putreg_win(uint_t, ulong_t);
1489 
1490 	if (reg == 0)
1491 		return;
1492 
1493 	if (reg < 16) {
1494 		(&rp->r_g1)[reg - 1] = value;
1495 		return;
1496 	}
1497 
1498 	/*
1499 	 * If the user process is still using some register windows, we
1500 	 * can just place the value in the correct window.
1501 	 */
1502 	cookie = dtrace_interrupt_disable();
1503 	if (dtrace_getotherwin() > 0) {
1504 		dtrace_putreg_win(reg, value);
1505 		dtrace_interrupt_enable(cookie);
1506 		atomic_inc_64(&fasttrap_putreg_fast_cnt);
1507 		return;
1508 	}
1509 	dtrace_interrupt_enable(cookie);
1510 
1511 	/*
1512 	 * First see if there's a copy of the register window in the
1513 	 * machpcb structure that we can modify; if there isn't try to
1514 	 * copy out the value. If that fails, we try to create a new
1515 	 * register window in the machpcb structure. While this isn't
1516 	 * _precisely_ the intended use of the machpcb structure, it
1517 	 * can't cause any problems since we know at this point in the
1518 	 * code that all of the user's data have been flushed out of the
1519 	 * register file (since %otherwin is 0).
1520 	 */
1521 	/* LINTED - alignment */
1522 	mpcb = (struct machpcb *)((caddr_t)rp - REGOFF);
1523 
1524 	if (get_udatamodel() == DATAMODEL_NATIVE) {
1525 		struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
1526 		/* LINTED - alignment */
1527 		struct rwindow *rwin = (struct rwindow *)mpcb->mpcb_wbuf;
1528 
1529 		if (mpcb->mpcb_wbcnt > 0) {
1530 			int i = mpcb->mpcb_wbcnt;
1531 			do {
1532 				i--;
1533 				if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1534 					continue;
1535 
1536 				rwin[i].rw_local[reg - 16] = value;
1537 				atomic_inc_64(&fasttrap_putreg_mpcb_cnt);
1538 				return;
1539 			} while (i > 0);
1540 		}
1541 
1542 		if (fasttrap_sulword(&fr->fr_local[reg - 16], value) != 0) {
1543 			if (mpcb->mpcb_wbcnt >= MAXWIN || copyin(fr,
1544 			    &rwin[mpcb->mpcb_wbcnt], sizeof (*rwin)) != 0)
1545 				goto err;
1546 
1547 			rwin[mpcb->mpcb_wbcnt].rw_local[reg - 16] = value;
1548 			mpcb->mpcb_spbuf[mpcb->mpcb_wbcnt] = (caddr_t)rp->r_sp;
1549 			mpcb->mpcb_wbcnt++;
1550 			atomic_inc_64(&fasttrap_putreg_mpcb_cnt);
1551 			return;
1552 		}
1553 	} else {
1554 		struct frame32 *fr =
1555 		    (struct frame32 *)(uintptr_t)(caddr32_t)rp->r_sp;
1556 		/* LINTED - alignment */
1557 		struct rwindow32 *rwin = (struct rwindow32 *)mpcb->mpcb_wbuf;
1558 		uint32_t v32 = (uint32_t)value;
1559 
1560 		if (mpcb->mpcb_wbcnt > 0) {
1561 			int i = mpcb->mpcb_wbcnt;
1562 			do {
1563 				i--;
1564 				if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1565 					continue;
1566 
1567 				rwin[i].rw_local[reg - 16] = v32;
1568 				atomic_inc_64(&fasttrap_putreg_mpcb_cnt);
1569 				return;
1570 			} while (i > 0);
1571 		}
1572 
1573 		if (fasttrap_suword32(&fr->fr_local[reg - 16], v32) != 0) {
1574 			if (mpcb->mpcb_wbcnt >= MAXWIN || copyin(fr,
1575 			    &rwin[mpcb->mpcb_wbcnt], sizeof (*rwin)) != 0)
1576 				goto err;
1577 
1578 			rwin[mpcb->mpcb_wbcnt].rw_local[reg - 16] = v32;
1579 			mpcb->mpcb_spbuf[mpcb->mpcb_wbcnt] = (caddr_t)rp->r_sp;
1580 			mpcb->mpcb_wbcnt++;
1581 			atomic_inc_64(&fasttrap_putreg_mpcb_cnt);
1582 			return;
1583 		}
1584 	}
1585 
1586 	atomic_inc_64(&fasttrap_putreg_slow_cnt);
1587 	return;
1588 
1589 err:
1590 	/*
1591 	 * If we couldn't record this register's value, the process is in an
1592 	 * irrecoverable state and we have no choice but to euthanize it.
1593 	 */
1594 	psignal(ttoproc(curthread), SIGILL);
1595 }
1596