xref: /titanic_44/usr/src/uts/sparc/dtrace/fasttrap_isa.c (revision 0bb073995ac5a95bd35f2dd790df1ea3d8c2d507)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/fasttrap_isa.h>
30 #include <sys/fasttrap_impl.h>
31 #include <sys/dtrace.h>
32 #include <sys/dtrace_impl.h>
33 #include <sys/cmn_err.h>
34 #include <sys/frame.h>
35 #include <sys/stack.h>
36 #include <sys/sysmacros.h>
37 #include <sys/trap.h>
38 
39 #include <v9/sys/machpcb.h>
40 #include <v9/sys/privregs.h>
41 
42 /*
43  * Lossless User-Land Tracing on SPARC
44  * -----------------------------------
45  *
46  * The Basic Idea
47  *
48  * The most important design constraint is, of course, correct execution of
49  * the user thread above all else. The next most important goal is rapid
50  * execution. We combine execution of instructions in user-land with
51  * emulation of certain instructions in the kernel to aim for complete
52  * correctness and maximal performance.
53  *
54  * We take advantage of the split PC/NPC architecture to speed up logical
55  * single-stepping; when we copy an instruction out to the scratch space in
56  * the ulwp_t structure (held in the %g7 register on SPARC), we can
57  * effectively single step by setting the PC to our scratch space and leaving
58  * the NPC alone. This executes the replaced instruction and then continues
59  * on without having to reenter the kernel as with single- stepping. The
60  * obvious caveat is for instructions whose execution is PC dependant --
61  * branches, call and link instructions (call and jmpl), and the rdpc
62  * instruction. These instructions cannot be executed in the manner described
63  * so they must be emulated in the kernel.
64  *
65  * Emulation for this small set of instructions if fairly simple; the most
66  * difficult part being emulating branch conditions.
67  *
68  *
69  * A Cache Heavy Portfolio
70  *
71  * It's important to note at this time that copying an instruction out to the
72  * ulwp_t scratch space in user-land is rather complicated. SPARC has
73  * separate data and instruction caches so any writes to the D$ (using a
74  * store instruction for example) aren't necessarily reflected in the I$.
75  * The flush instruction can be used to synchronize the two and must be used
76  * for any self-modifying code, but the flush instruction only applies to the
77  * primary address space (the absence of a flusha analogue to the flush
78  * instruction that accepts an ASI argument is an obvious omission from SPARC
79  * v9 where the notion of the alternate address space was introduced on
80  * SPARC). To correctly copy out the instruction we must use a block store
81  * that doesn't allocate in the D$ and ensures synchronization with the I$;
82  * see dtrace_blksuword32() for the implementation  (this function uses
83  * ASI_BLK_COMMIT_S to write a block through the secondary ASI in the manner
84  * described). Refer to the UltraSPARC I/II manual for details on the
85  * ASI_BLK_COMMIT_S ASI.
86  *
87  *
88  * Return Subtleties
89  *
90  * When we're firing a return probe we need to expose the value returned by
91  * the function being traced. Since the function can set the return value
92  * in its last instruction, we need to fire the return probe only _after_
93  * the effects of the instruction are apparent. For instructions that we
94  * emulate, we can call dtrace_probe() after we've performed the emulation;
95  * for instructions that we execute after we return to user-land, we set
96  * %pc to the instruction we copied out (as described above) and set %npc
97  * to a trap instruction stashed in the ulwp_t structure. After the traced
98  * instruction is executed, the trap instruction returns control to the
99  * kernel where we can fire the return probe.
100  *
101  * This need for a second trap in cases where we execute the traced
102  * instruction makes it all the more important to emulate the most common
103  * instructions to avoid the second trip in and out of the kernel.
104  *
105  *
106  * Making it Fast
107  *
108  * Since copying out an instruction is neither simple nor inexpensive for the
109  * CPU, we should attempt to avoid doing it in as many cases as possible.
110  * Since function entry and return are usually the most interesting probe
111  * sites, we attempt to tune the performance of the fasttrap provider around
112  * instructions typically in those places.
113  *
114  * Looking at a bunch of functions in libraries and executables reveals that
115  * most functions begin with either a save or a sethi (to setup a larger
116  * argument to the save) and end with a restore or an or (in the case of leaf
117  * functions). To try to improve performance, we emulate all of these
118  * instructions in the kernel.
119  *
120  * The save and restore instructions are a little tricky since they perform
121  * register window maniplulation. Rather than trying to tinker with the
122  * register windows from the kernel, we emulate the implicit add that takes
123  * place as part of those instructions and set the %pc to point to a simple
124  * save or restore we've hidden in the ulwp_t structure. If we're in a return
125  * probe so want to make it seem as though the tracepoint has been completely
126  * executed we need to remember that we've pulled this trick with restore and
127  * pull registers from the previous window (the one that we'll switch to once
128  * the simple store instruction is executed) rather than the current one. This
129  * is why in the case of emulating a restore we set the DTrace CPU flag
130  * CPU_DTRACE_FAKERESTORE before calling dtrace_probe() for the return probes
131  * (see fasttrap_return_common()).
132  */
133 
134 #define	OP(x)		((x) >> 30)
135 #define	OP2(x)		(((x) >> 22) & 0x07)
136 #define	OP3(x)		(((x) >> 19) & 0x3f)
137 #define	RCOND(x)	(((x) >> 25) & 0x07)
138 #define	COND(x)		(((x) >> 25) & 0x0f)
139 #define	A(x)		(((x) >> 29) & 0x01)
140 #define	I(x)		(((x) >> 13) & 0x01)
141 #define	RD(x)		(((x) >> 25) & 0x1f)
142 #define	RS1(x)		(((x) >> 14) & 0x1f)
143 #define	RS2(x)		(((x) >> 0) & 0x1f)
144 #define	CC(x)		(((x) >> 20) & 0x03)
145 #define	DISP16(x)	((((x) >> 6) & 0xc000) | ((x) & 0x3fff))
146 #define	DISP22(x)	((x) & 0x3fffff)
147 #define	DISP19(x)	((x) & 0x7ffff)
148 #define	DISP30(x)	((x) & 0x3fffffff)
149 #define	SW_TRAP(x)	((x) & 0x7f)
150 
151 #define	OP3_OR		0x02
152 #define	OP3_RD		0x28
153 #define	OP3_JMPL	0x38
154 #define	OP3_RETURN	0x39
155 #define	OP3_TCC		0x3a
156 #define	OP3_SAVE	0x3c
157 #define	OP3_RESTORE	0x3d
158 
159 #define	OP3_PREFETCH	0x2d
160 #define	OP3_CASA	0x3c
161 #define	OP3_PREFETCHA	0x3d
162 #define	OP3_CASXA	0x3e
163 
164 #define	OP2_ILLTRAP	0x0
165 #define	OP2_BPcc	0x1
166 #define	OP2_Bicc	0x2
167 #define	OP2_BPr		0x3
168 #define	OP2_SETHI	0x4
169 #define	OP2_FBPfcc	0x5
170 #define	OP2_FBfcc	0x6
171 
172 #define	R_G0		0
173 #define	R_O0		8
174 #define	R_SP		14
175 #define	R_I0		24
176 #define	R_I1		25
177 #define	R_I2		26
178 #define	R_I3		27
179 #define	R_I4		28
180 
181 /*
182  * Check the comment in fasttrap.h when changing these offsets or adding
183  * new instructions.
184  */
185 #define	FASTTRAP_OFF_SAVE	64
186 #define	FASTTRAP_OFF_RESTORE	68
187 #define	FASTTRAP_OFF_FTRET	72
188 #define	FASTTRAP_OFF_RETURN	76
189 
190 #define	BREAKPOINT_INSTR	0x91d02001	/* ta 1 */
191 
192 /*
193  * Tunable to let users turn off the fancy save instruction optimization.
194  * If a program is non-ABI compliant, there's a possibility that the save
195  * instruction optimization could cause an error.
196  */
197 int fasttrap_optimize_save = 1;
198 
199 static uint64_t
200 fasttrap_anarg(struct regs *rp, int argno)
201 {
202 	uint64_t value;
203 
204 	if (argno < 6)
205 		return ((&rp->r_o0)[argno]);
206 
207 	if (curproc->p_model == DATAMODEL_NATIVE) {
208 		struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
209 
210 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
211 		value = dtrace_fulword(&fr->fr_argd[argno]);
212 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR |
213 		    CPU_DTRACE_BADALIGN);
214 	} else {
215 		struct frame32 *fr = (struct frame32 *)rp->r_sp;
216 
217 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
218 		value = dtrace_fuword32(&fr->fr_argd[argno]);
219 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR |
220 		    CPU_DTRACE_BADALIGN);
221 	}
222 
223 	return (value);
224 }
225 
226 static ulong_t fasttrap_getreg(struct regs *, uint_t);
227 static void fasttrap_putreg(struct regs *, uint_t, ulong_t);
228 
229 static void
230 fasttrap_usdt_args(fasttrap_probe_t *probe, struct regs *rp,
231     uint_t fake_restore, int argc, uintptr_t *argv)
232 {
233 	int i, x, cap = MIN(argc, probe->ftp_nargs);
234 	int inc = (fake_restore ? 16 : 0);
235 
236 	/*
237 	 * The only way we'll hit the fake_restore case is if a USDT probe is
238 	 * invoked as a tail-call. While it wouldn't be incorrect, we can
239 	 * avoid a call to fasttrap_getreg(), and safely use rp->r_sp
240 	 * directly since a tail-call can't be made if the invoked function
241 	 * would use the argument dump space (i.e. if there were more than
242 	 * 6 arguments). We take this shortcut because unconditionally rooting
243 	 * around for R_FP (R_SP + 16) would be unnecessarily painful.
244 	 */
245 
246 	if (curproc->p_model == DATAMODEL_NATIVE) {
247 		struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
248 		uintptr_t v;
249 
250 		for (i = 0; i < cap; i++) {
251 			x = probe->ftp_argmap[i];
252 
253 			if (x < 6)
254 				argv[i] = fasttrap_getreg(rp, R_O0 + x + inc);
255 			else if (fasttrap_fulword(&fr->fr_argd[x], &v) != 0)
256 				argv[i] = 0;
257 		}
258 
259 	} else {
260 		struct frame32 *fr = (struct frame32 *)rp->r_sp;
261 		uint32_t v;
262 
263 		for (i = 0; i < cap; i++) {
264 			x = probe->ftp_argmap[i];
265 
266 			if (x < 6)
267 				argv[i] = fasttrap_getreg(rp, R_O0 + x + inc);
268 			else if (fasttrap_fuword32(&fr->fr_argd[x], &v) != 0)
269 				argv[i] = 0;
270 		}
271 	}
272 
273 	for (; i < argc; i++) {
274 		argv[i] = 0;
275 	}
276 }
277 
278 static void
279 fasttrap_return_common(struct regs *rp, uintptr_t pc, pid_t pid,
280     uint_t fake_restore)
281 {
282 	fasttrap_tracepoint_t *tp;
283 	fasttrap_bucket_t *bucket;
284 	fasttrap_id_t *id;
285 	kmutex_t *pid_mtx;
286 	dtrace_icookie_t cookie;
287 
288 	pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
289 	mutex_enter(pid_mtx);
290 	bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
291 
292 	for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
293 		if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
294 		    tp->ftt_proc->ftpc_acount != 0)
295 			break;
296 	}
297 
298 	/*
299 	 * Don't sweat it if we can't find the tracepoint again; unlike
300 	 * when we're in fasttrap_pid_probe(), finding the tracepoint here
301 	 * is not essential to the correct execution of the process.
302 	 */
303 	if (tp == NULL || tp->ftt_retids == NULL) {
304 		mutex_exit(pid_mtx);
305 		return;
306 	}
307 
308 	for (id = tp->ftt_retids; id != NULL; id = id->fti_next) {
309 		fasttrap_probe_t *probe = id->fti_probe;
310 
311 		if (id->fti_ptype == DTFTP_POST_OFFSETS) {
312 			if (probe->ftp_argmap != NULL && fake_restore) {
313 				uintptr_t t[5];
314 
315 				fasttrap_usdt_args(probe, rp, fake_restore,
316 				    sizeof (t) / sizeof (t[0]), t);
317 
318 				cookie = dtrace_interrupt_disable();
319 				DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE);
320 				dtrace_probe(probe->ftp_id, t[0], t[1],
321 				    t[2], t[3], t[4]);
322 				DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE);
323 				dtrace_interrupt_enable(cookie);
324 
325 			} else if (probe->ftp_argmap != NULL) {
326 				uintptr_t t[5];
327 
328 				fasttrap_usdt_args(probe, rp, fake_restore,
329 				    sizeof (t) / sizeof (t[0]), t);
330 
331 				dtrace_probe(probe->ftp_id, t[0], t[1],
332 				    t[2], t[3], t[4]);
333 
334 			} else if (fake_restore) {
335 				uintptr_t arg0 = fasttrap_getreg(rp, R_I0);
336 				uintptr_t arg1 = fasttrap_getreg(rp, R_I1);
337 				uintptr_t arg2 = fasttrap_getreg(rp, R_I2);
338 				uintptr_t arg3 = fasttrap_getreg(rp, R_I3);
339 				uintptr_t arg4 = fasttrap_getreg(rp, R_I4);
340 
341 				cookie = dtrace_interrupt_disable();
342 				DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE);
343 				dtrace_probe(probe->ftp_id, arg0, arg1,
344 				    arg2, arg3, arg4);
345 				DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE);
346 				dtrace_interrupt_enable(cookie);
347 
348 			} else {
349 				dtrace_probe(probe->ftp_id, rp->r_o0, rp->r_o1,
350 				    rp->r_o2, rp->r_o3, rp->r_o4);
351 			}
352 
353 			continue;
354 		}
355 
356 		/*
357 		 * If this is only a possible return point, we must
358 		 * be looking at a potential tail call in leaf context.
359 		 * If the %npc is still within this function, then we
360 		 * must have misidentified a jmpl as a tail-call when it
361 		 * is, in fact, part of a jump table. It would be nice to
362 		 * remove this tracepoint, but this is neither the time
363 		 * nor the place.
364 		 */
365 		if ((tp->ftt_flags & FASTTRAP_F_RETMAYBE) &&
366 		    rp->r_npc - probe->ftp_faddr < probe->ftp_fsize)
367 			continue;
368 
369 		/*
370 		 * It's possible for a function to branch to the delay slot
371 		 * of an instruction that we've identified as a return site.
372 		 * We can dectect this spurious return probe activation by
373 		 * observing that in this case %npc will be %pc + 4 and %npc
374 		 * will be inside the current function (unless the user is
375 		 * doing _crazy_ instruction picking in which case there's
376 		 * very little we can do). The second check is important
377 		 * in case the last instructions of a function make a tail-
378 		 * call to the function located immediately subsequent.
379 		 */
380 		if (rp->r_npc == rp->r_pc + 4 &&
381 		    rp->r_npc - probe->ftp_faddr < probe->ftp_fsize)
382 			continue;
383 
384 		/*
385 		 * The first argument is the offset of return tracepoint
386 		 * in the function; the remaining arguments are the return
387 		 * values.
388 		 *
389 		 * If fake_restore is set, we need to pull the return values
390 		 * out of the %i's rather than the %o's -- a little trickier.
391 		 */
392 		if (!fake_restore) {
393 			dtrace_probe(probe->ftp_id, pc - probe->ftp_faddr,
394 			    rp->r_o0, rp->r_o1, rp->r_o2, rp->r_o3);
395 		} else {
396 			uintptr_t arg0 = fasttrap_getreg(rp, R_I0);
397 			uintptr_t arg1 = fasttrap_getreg(rp, R_I1);
398 			uintptr_t arg2 = fasttrap_getreg(rp, R_I2);
399 			uintptr_t arg3 = fasttrap_getreg(rp, R_I3);
400 
401 			cookie = dtrace_interrupt_disable();
402 			DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE);
403 			dtrace_probe(probe->ftp_id, pc - probe->ftp_faddr,
404 			    arg0, arg1, arg2, arg3);
405 			DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE);
406 			dtrace_interrupt_enable(cookie);
407 		}
408 	}
409 
410 	mutex_exit(pid_mtx);
411 }
412 
413 int
414 fasttrap_pid_probe(struct regs *rp)
415 {
416 	proc_t *p = curproc;
417 	fasttrap_tracepoint_t *tp, tp_local;
418 	fasttrap_id_t *id;
419 	pid_t pid;
420 	uintptr_t pc = rp->r_pc;
421 	uintptr_t npc = rp->r_npc;
422 	uintptr_t orig_pc = pc;
423 	fasttrap_bucket_t *bucket;
424 	kmutex_t *pid_mtx;
425 	uint_t fake_restore = 0, is_enabled = 0;
426 	dtrace_icookie_t cookie;
427 
428 	/*
429 	 * It's possible that a user (in a veritable orgy of bad planning)
430 	 * could redirect this thread's flow of control before it reached the
431 	 * return probe fasttrap. In this case we need to kill the process
432 	 * since it's in a unrecoverable state.
433 	 */
434 	if (curthread->t_dtrace_step) {
435 		ASSERT(curthread->t_dtrace_on);
436 		fasttrap_sigtrap(p, curthread, pc);
437 		return (0);
438 	}
439 
440 	/*
441 	 * Clear all user tracing flags.
442 	 */
443 	curthread->t_dtrace_ft = 0;
444 	curthread->t_dtrace_pc = 0;
445 	curthread->t_dtrace_npc = 0;
446 	curthread->t_dtrace_scrpc = 0;
447 	curthread->t_dtrace_astpc = 0;
448 
449 	/*
450 	 * Treat a child created by a call to vfork(2) as if it were its
451 	 * parent. We know that there's only one thread of control in such a
452 	 * process: this one.
453 	 */
454 	while (p->p_flag & SVFORK) {
455 		p = p->p_parent;
456 	}
457 
458 	pid = p->p_pid;
459 	pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
460 	mutex_enter(pid_mtx);
461 	bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
462 
463 	/*
464 	 * Lookup the tracepoint that the process just hit.
465 	 */
466 	for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
467 		if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
468 		    tp->ftt_proc->ftpc_acount != 0)
469 			break;
470 	}
471 
472 	/*
473 	 * If we couldn't find a matching tracepoint, either a tracepoint has
474 	 * been inserted without using the pid<pid> ioctl interface (see
475 	 * fasttrap_ioctl), or somehow we have mislaid this tracepoint.
476 	 */
477 	if (tp == NULL) {
478 		mutex_exit(pid_mtx);
479 		return (-1);
480 	}
481 
482 	for (id = tp->ftt_ids; id != NULL; id = id->fti_next) {
483 		fasttrap_probe_t *probe = id->fti_probe;
484 		int isentry = (id->fti_ptype == DTFTP_ENTRY);
485 
486 		if (id->fti_ptype == DTFTP_IS_ENABLED) {
487 			is_enabled = 1;
488 			continue;
489 		}
490 
491 		/*
492 		 * We note that this was an entry probe to help ustack() find
493 		 * the first caller.
494 		 */
495 		if (isentry) {
496 			cookie = dtrace_interrupt_disable();
497 			DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY);
498 		}
499 		dtrace_probe(probe->ftp_id, rp->r_o0, rp->r_o1, rp->r_o2,
500 		    rp->r_o3, rp->r_o4);
501 		if (isentry) {
502 			DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY);
503 			dtrace_interrupt_enable(cookie);
504 		}
505 	}
506 
507 	/*
508 	 * We're about to do a bunch of work so we cache a local copy of
509 	 * the tracepoint to emulate the instruction, and then find the
510 	 * tracepoint again later if we need to light up any return probes.
511 	 */
512 	tp_local = *tp;
513 	mutex_exit(pid_mtx);
514 	tp = &tp_local;
515 
516 	/*
517 	 * If there's an is-enabled probe conntected to this tracepoint it
518 	 * means that there was a 'mov %g0, %o0' instruction that was placed
519 	 * there by DTrace when the binary was linked. As this probe is, in
520 	 * fact, enabled, we need to stuff 1 into %o0. Accordingly, we can
521 	 * bypass all the instruction emulation logic since we know the
522 	 * inevitable result. It's possible that a user could construct a
523 	 * scenario where the 'is-enabled' probe was on some other
524 	 * instruction, but that would be a rather exotic way to shoot oneself
525 	 * in the foot.
526 	 */
527 	if (is_enabled) {
528 		rp->r_o0 = 1;
529 		pc = rp->r_npc;
530 		npc = pc + 4;
531 		goto done;
532 	}
533 
534 	/*
535 	 * We emulate certain types of instructions to ensure correctness
536 	 * (in the case of position dependent instructions) or optimize
537 	 * common cases. The rest we have the thread execute back in user-
538 	 * land.
539 	 */
540 	switch (tp->ftt_type) {
541 	case FASTTRAP_T_SAVE:
542 	{
543 		int32_t imm;
544 
545 		/*
546 		 * This an optimization to let us handle function entry
547 		 * probes more efficiently. Many functions begin with a save
548 		 * instruction that follows the pattern:
549 		 *	save	%sp, <imm>, %sp
550 		 *
551 		 * Meanwhile, we've stashed the instruction:
552 		 *	save	%g1, %g0, %sp
553 		 *
554 		 * off of %g7, so all we have to do is stick the right value
555 		 * into %g1 and reset %pc to point to the instruction we've
556 		 * cleverly hidden (%npc should not be touched).
557 		 */
558 
559 		imm = tp->ftt_instr << 19;
560 		imm >>= 19;
561 		rp->r_g1 = rp->r_sp + imm;
562 		pc = rp->r_g7 + FASTTRAP_OFF_SAVE;
563 		break;
564 	}
565 
566 	case FASTTRAP_T_RESTORE:
567 	{
568 		ulong_t value;
569 		uint_t rd;
570 
571 		/*
572 		 * This is an optimization to let us handle function
573 		 * return probes more efficiently. Most non-leaf functions
574 		 * end with the sequence:
575 		 *	ret
576 		 *	restore	<reg>, <reg_or_imm>, %oX
577 		 *
578 		 * We've stashed the instruction:
579 		 *	restore	%g0, %g0, %g0
580 		 *
581 		 * off of %g7 so we just need to place the correct value
582 		 * in the right %i register (since after our fake-o
583 		 * restore, the %i's will become the %o's) and set the %pc
584 		 * to point to our hidden restore. We also set fake_restore to
585 		 * let fasttrap_return_common() know that it will find the
586 		 * return values in the %i's rather than the %o's.
587 		 */
588 
589 		if (I(tp->ftt_instr)) {
590 			int32_t imm;
591 
592 			imm = tp->ftt_instr << 19;
593 			imm >>= 19;
594 			value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + imm;
595 		} else {
596 			value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) +
597 			    fasttrap_getreg(rp, RS2(tp->ftt_instr));
598 		}
599 
600 		/*
601 		 * Convert %o's to %i's; leave %g's as they are.
602 		 */
603 		rd = RD(tp->ftt_instr);
604 		fasttrap_putreg(rp, ((rd & 0x18) == 0x8) ? rd + 16 : rd, value);
605 
606 		pc = rp->r_g7 + FASTTRAP_OFF_RESTORE;
607 		fake_restore = 1;
608 		break;
609 	}
610 
611 	case FASTTRAP_T_RETURN:
612 	{
613 		uintptr_t target;
614 
615 		/*
616 		 * A return instruction is like a jmpl (without the link
617 		 * part) that executes an implicit restore. We've stashed
618 		 * the instruction:
619 		 *	return %o0
620 		 *
621 		 * off of %g7 so we just need to place the target in %o0
622 		 * and set the %pc to point to the stashed return instruction.
623 		 * We use %o0 since that register disappears after the return
624 		 * executes, erasing any evidence of this tampering.
625 		 */
626 		if (I(tp->ftt_instr)) {
627 			int32_t imm;
628 
629 			imm = tp->ftt_instr << 19;
630 			imm >>= 19;
631 			target = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + imm;
632 		} else {
633 			target = fasttrap_getreg(rp, RS1(tp->ftt_instr)) +
634 			    fasttrap_getreg(rp, RS2(tp->ftt_instr));
635 		}
636 
637 		fasttrap_putreg(rp, R_O0, target);
638 
639 		pc = rp->r_g7 + FASTTRAP_OFF_RETURN;
640 		fake_restore = 1;
641 		break;
642 	}
643 
644 	case FASTTRAP_T_OR:
645 	{
646 		ulong_t value;
647 
648 		if (I(tp->ftt_instr)) {
649 			int32_t imm;
650 
651 			imm = tp->ftt_instr << 19;
652 			imm >>= 19;
653 			value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) | imm;
654 		} else {
655 			value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) |
656 			    fasttrap_getreg(rp, RS2(tp->ftt_instr));
657 		}
658 
659 		fasttrap_putreg(rp, RD(tp->ftt_instr), value);
660 		pc = rp->r_npc;
661 		npc = pc + 4;
662 		break;
663 	}
664 
665 	case FASTTRAP_T_SETHI:
666 		if (RD(tp->ftt_instr) != R_G0) {
667 			uint32_t imm32 = tp->ftt_instr << 10;
668 			fasttrap_putreg(rp, RD(tp->ftt_instr), (ulong_t)imm32);
669 		}
670 		pc = rp->r_npc;
671 		npc = pc + 4;
672 		break;
673 
674 	case FASTTRAP_T_CCR:
675 	{
676 		uint_t c, v, z, n, taken;
677 		uint_t ccr = rp->r_tstate >> TSTATE_CCR_SHIFT;
678 
679 		if (tp->ftt_cc != 0)
680 			ccr >>= 4;
681 
682 		c = (ccr >> 0) & 1;
683 		v = (ccr >> 1) & 1;
684 		z = (ccr >> 2) & 1;
685 		n = (ccr >> 3) & 1;
686 
687 		switch (tp->ftt_code) {
688 		case 0x0:	/* BN */
689 			taken = 0;		break;
690 		case 0x1:	/* BE */
691 			taken = z;		break;
692 		case 0x2:	/* BLE */
693 			taken = z | (n ^ v);	break;
694 		case 0x3:	/* BL */
695 			taken = n ^ v;		break;
696 		case 0x4:	/* BLEU */
697 			taken = c | z;		break;
698 		case 0x5:	/* BCS (BLU) */
699 			taken = c;		break;
700 		case 0x6:	/* BNEG */
701 			taken = n;		break;
702 		case 0x7:	/* BVS */
703 			taken = v;		break;
704 		case 0x8:	/* BA */
705 			/*
706 			 * We handle the BA case differently since the annul
707 			 * bit means something slightly different.
708 			 */
709 			panic("fasttrap: mishandled a branch");
710 			taken = 1;		break;
711 		case 0x9:	/* BNE */
712 			taken = ~z;		break;
713 		case 0xa:	/* BG */
714 			taken = ~(z | (n ^ v));	break;
715 		case 0xb:	/* BGE */
716 			taken = ~(n ^ v);	break;
717 		case 0xc:	/* BGU */
718 			taken = ~(c | z);	break;
719 		case 0xd:	/* BCC (BGEU) */
720 			taken = ~c;		break;
721 		case 0xe:	/* BPOS */
722 			taken = ~n;		break;
723 		case 0xf:	/* BVC */
724 			taken = ~v;		break;
725 		}
726 
727 		if (taken & 1) {
728 			pc = rp->r_npc;
729 			npc = tp->ftt_dest;
730 		} else if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
731 			/*
732 			 * Untaken annulled branches don't execute the
733 			 * instruction in the delay slot.
734 			 */
735 			pc = rp->r_npc + 4;
736 			npc = pc + 4;
737 		} else {
738 			pc = rp->r_npc;
739 			npc = pc + 4;
740 		}
741 		break;
742 	}
743 
744 	case FASTTRAP_T_FCC:
745 	{
746 		uint_t fcc;
747 		uint_t taken;
748 		uint64_t fsr;
749 
750 		dtrace_getfsr(&fsr);
751 
752 		if (tp->ftt_cc == 0) {
753 			fcc = (fsr >> 10) & 0x3;
754 		} else {
755 			uint_t shift;
756 			ASSERT(tp->ftt_cc <= 3);
757 			shift = 30 + tp->ftt_cc * 2;
758 			fcc = (fsr >> shift) & 0x3;
759 		}
760 
761 		switch (tp->ftt_code) {
762 		case 0x0:	/* FBN */
763 			taken = (1 << fcc) & (0|0|0|0);	break;
764 		case 0x1:	/* FBNE */
765 			taken = (1 << fcc) & (8|4|2|0);	break;
766 		case 0x2:	/* FBLG */
767 			taken = (1 << fcc) & (0|4|2|0);	break;
768 		case 0x3:	/* FBUL */
769 			taken = (1 << fcc) & (8|0|2|0);	break;
770 		case 0x4:	/* FBL */
771 			taken = (1 << fcc) & (0|0|2|0);	break;
772 		case 0x5:	/* FBUG */
773 			taken = (1 << fcc) & (8|4|0|0);	break;
774 		case 0x6:	/* FBG */
775 			taken = (1 << fcc) & (0|4|0|0);	break;
776 		case 0x7:	/* FBU */
777 			taken = (1 << fcc) & (8|0|0|0);	break;
778 		case 0x8:	/* FBA */
779 			/*
780 			 * We handle the FBA case differently since the annul
781 			 * bit means something slightly different.
782 			 */
783 			panic("fasttrap: mishandled a branch");
784 			taken = (1 << fcc) & (8|4|2|1);	break;
785 		case 0x9:	/* FBE */
786 			taken = (1 << fcc) & (0|0|0|1);	break;
787 		case 0xa:	/* FBUE */
788 			taken = (1 << fcc) & (8|0|0|1);	break;
789 		case 0xb:	/* FBGE */
790 			taken = (1 << fcc) & (0|4|0|1);	break;
791 		case 0xc:	/* FBUGE */
792 			taken = (1 << fcc) & (8|4|0|1);	break;
793 		case 0xd:	/* FBLE */
794 			taken = (1 << fcc) & (0|0|2|1);	break;
795 		case 0xe:	/* FBULE */
796 			taken = (1 << fcc) & (8|0|2|1);	break;
797 		case 0xf:	/* FBO */
798 			taken = (1 << fcc) & (0|4|2|1);	break;
799 		}
800 
801 		if (taken) {
802 			pc = rp->r_npc;
803 			npc = tp->ftt_dest;
804 		} else if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
805 			/*
806 			 * Untaken annulled branches don't execute the
807 			 * instruction in the delay slot.
808 			 */
809 			pc = rp->r_npc + 4;
810 			npc = pc + 4;
811 		} else {
812 			pc = rp->r_npc;
813 			npc = pc + 4;
814 		}
815 		break;
816 	}
817 
818 	case FASTTRAP_T_REG:
819 	{
820 		int64_t value;
821 		uint_t taken;
822 		uint_t reg = RS1(tp->ftt_instr);
823 
824 		/*
825 		 * An ILP32 process shouldn't be using a branch predicated on
826 		 * an %i or an %l since it would violate the ABI. It's a
827 		 * violation of the ABI because we can't ensure deterministic
828 		 * behavior. We should have identified this case when we
829 		 * enabled the probe.
830 		 */
831 		ASSERT(p->p_model == DATAMODEL_LP64 || reg < 16);
832 
833 		value = (int64_t)fasttrap_getreg(rp, reg);
834 
835 		switch (tp->ftt_code) {
836 		case 0x1:	/* BRZ */
837 			taken = (value == 0);	break;
838 		case 0x2:	/* BRLEZ */
839 			taken = (value <= 0);	break;
840 		case 0x3:	/* BRLZ */
841 			taken = (value < 0);	break;
842 		case 0x5:	/* BRNZ */
843 			taken = (value != 0);	break;
844 		case 0x6:	/* BRGZ */
845 			taken = (value > 0);	break;
846 		case 0x7:	/* BRGEZ */
847 			taken = (value >= 0);	break;
848 		default:
849 		case 0x0:
850 		case 0x4:
851 			panic("fasttrap: mishandled a branch");
852 		}
853 
854 		if (taken) {
855 			pc = rp->r_npc;
856 			npc = tp->ftt_dest;
857 		} else if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
858 			/*
859 			 * Untaken annulled branches don't execute the
860 			 * instruction in the delay slot.
861 			 */
862 			pc = rp->r_npc + 4;
863 			npc = pc + 4;
864 		} else {
865 			pc = rp->r_npc;
866 			npc = pc + 4;
867 		}
868 		break;
869 	}
870 
871 	case FASTTRAP_T_ALWAYS:
872 		/*
873 		 * BAs, BA,As...
874 		 */
875 
876 		if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
877 			/*
878 			 * Annulled branch always instructions never execute
879 			 * the instruction in the delay slot.
880 			 */
881 			pc = tp->ftt_dest;
882 			npc = tp->ftt_dest + 4;
883 		} else {
884 			pc = rp->r_npc;
885 			npc = tp->ftt_dest;
886 		}
887 		break;
888 
889 	case FASTTRAP_T_RDPC:
890 		fasttrap_putreg(rp, RD(tp->ftt_instr), rp->r_pc);
891 		pc = rp->r_npc;
892 		npc = pc + 4;
893 		break;
894 
895 	case FASTTRAP_T_CALL:
896 		/*
897 		 * It's a call _and_ link remember...
898 		 */
899 		rp->r_o7 = rp->r_pc;
900 		pc = rp->r_npc;
901 		npc = tp->ftt_dest;
902 		break;
903 
904 	case FASTTRAP_T_JMPL:
905 		pc = rp->r_npc;
906 
907 		if (I(tp->ftt_instr)) {
908 			uint_t rs1 = RS1(tp->ftt_instr);
909 			int32_t imm;
910 
911 			imm = tp->ftt_instr << 19;
912 			imm >>= 19;
913 			npc = fasttrap_getreg(rp, rs1) + imm;
914 		} else {
915 			uint_t rs1 = RS1(tp->ftt_instr);
916 			uint_t rs2 = RS2(tp->ftt_instr);
917 
918 			npc = fasttrap_getreg(rp, rs1) +
919 			    fasttrap_getreg(rp, rs2);
920 		}
921 
922 		/*
923 		 * Do the link part of the jump-and-link instruction.
924 		 */
925 		fasttrap_putreg(rp, RD(tp->ftt_instr), rp->r_pc);
926 
927 		break;
928 
929 	case FASTTRAP_T_COMMON:
930 	{
931 		curthread->t_dtrace_scrpc = rp->r_g7;
932 		curthread->t_dtrace_astpc = rp->r_g7 + FASTTRAP_OFF_FTRET;
933 
934 		/*
935 		 * Copy the instruction to a reserved location in the
936 		 * user-land thread structure, then set the PC to that
937 		 * location and leave the NPC alone. We take pains to ensure
938 		 * consistency in the instruction stream (See SPARC
939 		 * Architecture Manual Version 9, sections 8.4.7, A.20, and
940 		 * H.1.6; UltraSPARC I/II User's Manual, sections 3.1.1.1,
941 		 * and 13.6.4) by using the ASI ASI_BLK_COMMIT_S to copy the
942 		 * instruction into the user's address space without
943 		 * bypassing the I$. There's no AS_USER version of this ASI
944 		 * (as exist for other ASIs) so we use the lofault
945 		 * mechanism to catch faults.
946 		 */
947 		if (dtrace_blksuword32(rp->r_g7, &tp->ftt_instr, 1) == -1) {
948 			/*
949 			 * If the copyout fails, then the process's state
950 			 * is not consistent (the effects of the traced
951 			 * instruction will never be seen). This process
952 			 * cannot be allowed to continue execution.
953 			 */
954 			fasttrap_sigtrap(curproc, curthread, pc);
955 			return (0);
956 		}
957 
958 		curthread->t_dtrace_pc = pc;
959 		curthread->t_dtrace_npc = npc;
960 		curthread->t_dtrace_on = 1;
961 
962 		pc = curthread->t_dtrace_scrpc;
963 
964 		if (tp->ftt_retids != NULL) {
965 			curthread->t_dtrace_step = 1;
966 			curthread->t_dtrace_ret = 1;
967 			npc = curthread->t_dtrace_astpc;
968 		}
969 		break;
970 	}
971 
972 	default:
973 		panic("fasttrap: mishandled an instruction");
974 	}
975 
976 	/*
977 	 * This bit me in the ass a couple of times, so lets toss this
978 	 * in as a cursory sanity check.
979 	 */
980 	ASSERT(pc != rp->r_g7 + 4);
981 	ASSERT(pc != rp->r_g7 + 8);
982 
983 done:
984 	/*
985 	 * If there were no return probes when we first found the tracepoint,
986 	 * we should feel no obligation to honor any return probes that were
987 	 * subsequently enabled -- they'll just have to wait until the next
988 	 * time around.
989 	 */
990 	if (tp->ftt_retids != NULL) {
991 		/*
992 		 * We need to wait until the results of the instruction are
993 		 * apparent before invoking any return probes. If this
994 		 * instruction was emulated we can just call
995 		 * fasttrap_return_common(); if it needs to be executed, we
996 		 * need to wait until we return to the kernel.
997 		 */
998 		if (tp->ftt_type != FASTTRAP_T_COMMON) {
999 			fasttrap_return_common(rp, orig_pc, pid, fake_restore);
1000 		} else {
1001 			ASSERT(curthread->t_dtrace_ret != 0);
1002 			ASSERT(curthread->t_dtrace_pc == orig_pc);
1003 			ASSERT(curthread->t_dtrace_scrpc == rp->r_g7);
1004 			ASSERT(npc == curthread->t_dtrace_astpc);
1005 		}
1006 	}
1007 
1008 	ASSERT(pc != 0);
1009 	rp->r_pc = pc;
1010 	rp->r_npc = npc;
1011 
1012 	return (0);
1013 }
1014 
1015 int
1016 fasttrap_return_probe(struct regs *rp)
1017 {
1018 	proc_t *p = ttoproc(curthread);
1019 	pid_t pid;
1020 	uintptr_t pc = curthread->t_dtrace_pc;
1021 	uintptr_t npc = curthread->t_dtrace_npc;
1022 
1023 	curthread->t_dtrace_pc = 0;
1024 	curthread->t_dtrace_npc = 0;
1025 	curthread->t_dtrace_scrpc = 0;
1026 	curthread->t_dtrace_astpc = 0;
1027 
1028 	/*
1029 	 * Treat a child created by a call to vfork(2) as if it were its
1030 	 * parent. We know there's only one thread of control in such a
1031 	 * process: this one.
1032 	 */
1033 	while (p->p_flag & SVFORK) {
1034 		p = p->p_parent;
1035 	}
1036 
1037 	/*
1038 	 * We set the %pc and %npc to their values when the traced
1039 	 * instruction was initially executed so that it appears to
1040 	 * dtrace_probe() that we're on the original instruction, and so that
1041 	 * the user can't easily detect our complex web of lies.
1042 	 * dtrace_return_probe() (our caller) will correctly set %pc and %npc
1043 	 * after we return.
1044 	 */
1045 	rp->r_pc = pc;
1046 	rp->r_npc = npc;
1047 
1048 	pid = p->p_pid;
1049 	fasttrap_return_common(rp, pc, pid, 0);
1050 
1051 	return (0);
1052 }
1053 
1054 int
1055 fasttrap_tracepoint_install(proc_t *p, fasttrap_tracepoint_t *tp)
1056 {
1057 	fasttrap_instr_t instr = FASTTRAP_INSTR;
1058 
1059 	if (uwrite(p, &instr, 4, tp->ftt_pc) != 0)
1060 		return (-1);
1061 
1062 	return (0);
1063 }
1064 
1065 int
1066 fasttrap_tracepoint_remove(proc_t *p, fasttrap_tracepoint_t *tp)
1067 {
1068 	fasttrap_instr_t instr;
1069 
1070 	/*
1071 	 * Distinguish between read or write failures and a changed
1072 	 * instruction.
1073 	 */
1074 	if (uread(p, &instr, 4, tp->ftt_pc) != 0)
1075 		return (0);
1076 	if (instr != FASTTRAP_INSTR && instr != BREAKPOINT_INSTR)
1077 		return (0);
1078 	if (uwrite(p, &tp->ftt_instr, 4, tp->ftt_pc) != 0)
1079 		return (-1);
1080 
1081 	return (0);
1082 }
1083 
1084 int
1085 fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, uintptr_t pc,
1086     fasttrap_probe_type_t type)
1087 {
1088 	uint32_t instr;
1089 	int32_t disp;
1090 
1091 	/*
1092 	 * Read the instruction at the given address out of the process's
1093 	 * address space. We don't have to worry about a debugger
1094 	 * changing this instruction before we overwrite it with our trap
1095 	 * instruction since P_PR_LOCK is set.
1096 	 */
1097 	if (uread(p, &instr, 4, pc) != 0)
1098 		return (-1);
1099 
1100 	/*
1101 	 * Decode the instruction to fill in the probe flags. We can have
1102 	 * the process execute most instructions on its own using a pc/npc
1103 	 * trick, but pc-relative control transfer present a problem since
1104 	 * we're relocating the instruction. We emulate these instructions
1105 	 * in the kernel. We assume a default type and over-write that as
1106 	 * needed.
1107 	 *
1108 	 * pc-relative instructions must be emulated for correctness;
1109 	 * other instructions (which represent a large set of commonly traced
1110 	 * instructions) are emulated or otherwise optimized for performance.
1111 	 */
1112 	tp->ftt_type = FASTTRAP_T_COMMON;
1113 	if (OP(instr) == 1) {
1114 		/*
1115 		 * Call instructions.
1116 		 */
1117 		tp->ftt_type = FASTTRAP_T_CALL;
1118 		disp = DISP30(instr) << 2;
1119 		tp->ftt_dest = pc + (intptr_t)disp;
1120 
1121 	} else if (OP(instr) == 0) {
1122 		/*
1123 		 * Branch instructions.
1124 		 *
1125 		 * Unconditional branches need careful attention when they're
1126 		 * annulled: annulled unconditional branches never execute
1127 		 * the instruction in the delay slot.
1128 		 */
1129 		switch (OP2(instr)) {
1130 		case OP2_ILLTRAP:
1131 		case 0x7:
1132 			/*
1133 			 * The compiler may place an illtrap after a call to
1134 			 * a function that returns a structure. In the case of
1135 			 * a returned structure, the compiler places an illtrap
1136 			 * whose const22 field is the size of the returned
1137 			 * structure immediately following the delay slot of
1138 			 * the call. To stay out of the way, we refuse to
1139 			 * place tracepoints on top of illtrap instructions.
1140 			 *
1141 			 * This is one of the dumbest architectural decisions
1142 			 * I've ever had to work around.
1143 			 *
1144 			 * We also identify the only illegal op2 value (See
1145 			 * SPARC Architecture Manual Version 9, E.2 table 31).
1146 			 */
1147 			return (-1);
1148 
1149 		case OP2_BPcc:
1150 			if (COND(instr) == 8) {
1151 				tp->ftt_type = FASTTRAP_T_ALWAYS;
1152 			} else {
1153 				/*
1154 				 * Check for an illegal instruction.
1155 				 */
1156 				if (CC(instr) & 1)
1157 					return (-1);
1158 				tp->ftt_type = FASTTRAP_T_CCR;
1159 				tp->ftt_cc = CC(instr);
1160 				tp->ftt_code = COND(instr);
1161 			}
1162 
1163 			if (A(instr) != 0)
1164 				tp->ftt_flags |= FASTTRAP_F_ANNUL;
1165 
1166 			disp = DISP19(instr);
1167 			disp <<= 13;
1168 			disp >>= 11;
1169 			tp->ftt_dest = pc + (intptr_t)disp;
1170 			break;
1171 
1172 		case OP2_Bicc:
1173 			if (COND(instr) == 8) {
1174 				tp->ftt_type = FASTTRAP_T_ALWAYS;
1175 			} else {
1176 				tp->ftt_type = FASTTRAP_T_CCR;
1177 				tp->ftt_cc = 0;
1178 				tp->ftt_code = COND(instr);
1179 			}
1180 
1181 			if (A(instr) != 0)
1182 				tp->ftt_flags |= FASTTRAP_F_ANNUL;
1183 
1184 			disp = DISP22(instr);
1185 			disp <<= 10;
1186 			disp >>= 8;
1187 			tp->ftt_dest = pc + (intptr_t)disp;
1188 			break;
1189 
1190 		case OP2_BPr:
1191 			/*
1192 			 * Check for an illegal instruction.
1193 			 */
1194 			if ((RCOND(instr) & 3) == 0)
1195 				return (-1);
1196 
1197 			/*
1198 			 * It's a violation of the v8plus ABI to use a
1199 			 * register-predicated branch in a 32-bit app if
1200 			 * the register used is an %l or an %i (%gs and %os
1201 			 * are legit because they're not saved to the stack
1202 			 * in 32-bit words when we take a trap).
1203 			 */
1204 			if (p->p_model == DATAMODEL_ILP32 && RS1(instr) >= 16)
1205 				return (-1);
1206 
1207 			tp->ftt_type = FASTTRAP_T_REG;
1208 			if (A(instr) != 0)
1209 				tp->ftt_flags |= FASTTRAP_F_ANNUL;
1210 			disp = DISP16(instr);
1211 			disp <<= 16;
1212 			disp >>= 14;
1213 			tp->ftt_dest = pc + (intptr_t)disp;
1214 			tp->ftt_code = RCOND(instr);
1215 			break;
1216 
1217 		case OP2_SETHI:
1218 			tp->ftt_type = FASTTRAP_T_SETHI;
1219 			break;
1220 
1221 		case OP2_FBPfcc:
1222 			if (COND(instr) == 8) {
1223 				tp->ftt_type = FASTTRAP_T_ALWAYS;
1224 			} else {
1225 				tp->ftt_type = FASTTRAP_T_FCC;
1226 				tp->ftt_cc = CC(instr);
1227 				tp->ftt_code = COND(instr);
1228 			}
1229 
1230 			if (A(instr) != 0)
1231 				tp->ftt_flags |= FASTTRAP_F_ANNUL;
1232 
1233 			disp = DISP19(instr);
1234 			disp <<= 13;
1235 			disp >>= 11;
1236 			tp->ftt_dest = pc + (intptr_t)disp;
1237 			break;
1238 
1239 		case OP2_FBfcc:
1240 			if (COND(instr) == 8) {
1241 				tp->ftt_type = FASTTRAP_T_ALWAYS;
1242 			} else {
1243 				tp->ftt_type = FASTTRAP_T_FCC;
1244 				tp->ftt_cc = 0;
1245 				tp->ftt_code = COND(instr);
1246 			}
1247 
1248 			if (A(instr) != 0)
1249 				tp->ftt_flags |= FASTTRAP_F_ANNUL;
1250 
1251 			disp = DISP22(instr);
1252 			disp <<= 10;
1253 			disp >>= 8;
1254 			tp->ftt_dest = pc + (intptr_t)disp;
1255 			break;
1256 		}
1257 
1258 	} else if (OP(instr) == 2) {
1259 		switch (OP3(instr)) {
1260 		case OP3_RETURN:
1261 			tp->ftt_type = FASTTRAP_T_RETURN;
1262 			break;
1263 
1264 		case OP3_JMPL:
1265 			tp->ftt_type = FASTTRAP_T_JMPL;
1266 			break;
1267 
1268 		case OP3_RD:
1269 			if (RS1(instr) == 5)
1270 				tp->ftt_type = FASTTRAP_T_RDPC;
1271 			break;
1272 
1273 		case OP3_SAVE:
1274 			/*
1275 			 * We optimize for save instructions at function
1276 			 * entry; see the comment in fasttrap_pid_probe()
1277 			 * (near FASTTRAP_T_SAVE) for details.
1278 			 */
1279 			if (fasttrap_optimize_save != 0 &&
1280 			    type == DTFTP_ENTRY &&
1281 			    I(instr) == 1 && RD(instr) == R_SP)
1282 				tp->ftt_type = FASTTRAP_T_SAVE;
1283 			break;
1284 
1285 		case OP3_RESTORE:
1286 			/*
1287 			 * We optimize restore instructions at function
1288 			 * return; see the comment in fasttrap_pid_probe()
1289 			 * (near FASTTRAP_T_RESTORE) for details.
1290 			 *
1291 			 * rd must be an %o or %g register.
1292 			 */
1293 			if ((RD(instr) & 0x10) == 0)
1294 				tp->ftt_type = FASTTRAP_T_RESTORE;
1295 			break;
1296 
1297 		case OP3_OR:
1298 			/*
1299 			 * A large proportion of instructions in the delay
1300 			 * slot of retl instructions are or's so we emulate
1301 			 * these downstairs as an optimization.
1302 			 */
1303 			tp->ftt_type = FASTTRAP_T_OR;
1304 			break;
1305 
1306 		case OP3_TCC:
1307 			/*
1308 			 * Breakpoint instructions are effectively position-
1309 			 * dependent since the debugger uses the %pc value
1310 			 * to lookup which breakpoint was executed. As a
1311 			 * result, we can't actually instrument breakpoints.
1312 			 */
1313 			if (SW_TRAP(instr) == ST_BREAKPOINT)
1314 				return (-1);
1315 			break;
1316 
1317 		case 0x19:
1318 		case 0x1d:
1319 		case 0x29:
1320 		case 0x33:
1321 		case 0x3f:
1322 			/*
1323 			 * Identify illegal instructions (See SPARC
1324 			 * Architecture Manual Version 9, E.2 table 32).
1325 			 */
1326 			return (-1);
1327 		}
1328 	} else if (OP(instr) == 3) {
1329 		uint32_t op3 = OP3(instr);
1330 
1331 		/*
1332 		 * Identify illegal instructions (See SPARC Architecture
1333 		 * Manual Version 9, E.2 table 33).
1334 		 */
1335 		if ((op3 & 0x28) == 0x28) {
1336 			if (op3 != OP3_PREFETCH && op3 != OP3_CASA &&
1337 			    op3 != OP3_PREFETCHA && op3 != OP3_CASXA)
1338 				return (-1);
1339 		} else {
1340 			if ((op3 & 0x0f) == 0x0c || (op3 & 0x3b) == 0x31)
1341 				return (-1);
1342 		}
1343 	}
1344 
1345 	tp->ftt_instr = instr;
1346 
1347 	/*
1348 	 * We don't know how this tracepoint is going to be used, but in case
1349 	 * it's used as part of a function return probe, we need to indicate
1350 	 * whether it's always a return site or only potentially a return
1351 	 * site. If it's part of a return probe, it's always going to be a
1352 	 * return from that function if it's a restore instruction or if
1353 	 * the previous instruction was a return. If we could reliably
1354 	 * distinguish jump tables from return sites, this wouldn't be
1355 	 * necessary.
1356 	 */
1357 	if (tp->ftt_type != FASTTRAP_T_RESTORE &&
1358 	    (uread(p, &instr, 4, pc - sizeof (instr)) != 0 ||
1359 	    !(OP(instr) == 2 && OP3(instr) == OP3_RETURN)))
1360 		tp->ftt_flags |= FASTTRAP_F_RETMAYBE;
1361 
1362 	return (0);
1363 }
1364 
1365 /*ARGSUSED*/
1366 uint64_t
1367 fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
1368     int aframes)
1369 {
1370 	return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, argno));
1371 }
1372 
1373 /*ARGSUSED*/
1374 uint64_t
1375 fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
1376     int aframes)
1377 {
1378 	return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, argno));
1379 }
1380 
1381 static uint64_t fasttrap_getreg_fast_cnt;
1382 static uint64_t fasttrap_getreg_mpcb_cnt;
1383 static uint64_t fasttrap_getreg_slow_cnt;
1384 
1385 static ulong_t
1386 fasttrap_getreg(struct regs *rp, uint_t reg)
1387 {
1388 	ulong_t value;
1389 	dtrace_icookie_t cookie;
1390 	struct machpcb *mpcb;
1391 	extern ulong_t dtrace_getreg_win(uint_t, uint_t);
1392 
1393 	/*
1394 	 * We have the %os and %gs in our struct regs, but if we need to
1395 	 * snag a %l or %i we need to go scrounging around in the process's
1396 	 * address space.
1397 	 */
1398 	if (reg == 0)
1399 		return (0);
1400 
1401 	if (reg < 16)
1402 		return ((&rp->r_g1)[reg - 1]);
1403 
1404 	/*
1405 	 * Before we look at the user's stack, we'll check the register
1406 	 * windows to see if the information we want is in there.
1407 	 */
1408 	cookie = dtrace_interrupt_disable();
1409 	if (dtrace_getotherwin() > 0) {
1410 		value = dtrace_getreg_win(reg, 1);
1411 		dtrace_interrupt_enable(cookie);
1412 
1413 		atomic_add_64(&fasttrap_getreg_fast_cnt, 1);
1414 
1415 		return (value);
1416 	}
1417 	dtrace_interrupt_enable(cookie);
1418 
1419 	/*
1420 	 * First check the machpcb structure to see if we've already read
1421 	 * in the register window we're looking for; if we haven't, (and
1422 	 * we probably haven't) try to copy in the value of the register.
1423 	 */
1424 	/* LINTED - alignment */
1425 	mpcb = (struct machpcb *)((caddr_t)rp - REGOFF);
1426 
1427 	if (get_udatamodel() == DATAMODEL_NATIVE) {
1428 		struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
1429 
1430 		if (mpcb->mpcb_wbcnt > 0) {
1431 			struct rwindow *rwin = (void *)mpcb->mpcb_wbuf;
1432 			int i = mpcb->mpcb_wbcnt;
1433 			do {
1434 				i--;
1435 				if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1436 					continue;
1437 
1438 				atomic_add_64(&fasttrap_getreg_mpcb_cnt, 1);
1439 				return (rwin[i].rw_local[reg - 16]);
1440 			} while (i > 0);
1441 		}
1442 
1443 		if (fasttrap_fulword(&fr->fr_local[reg - 16], &value) != 0)
1444 			goto err;
1445 	} else {
1446 		struct frame32 *fr =
1447 		    (struct frame32 *)(uintptr_t)(caddr32_t)rp->r_sp;
1448 		uint32_t *v32 = (uint32_t *)&value;
1449 
1450 		if (mpcb->mpcb_wbcnt > 0) {
1451 			struct rwindow32 *rwin = (void *)mpcb->mpcb_wbuf;
1452 			int i = mpcb->mpcb_wbcnt;
1453 			do {
1454 				i--;
1455 				if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1456 					continue;
1457 
1458 				atomic_add_64(&fasttrap_getreg_mpcb_cnt, 1);
1459 				return (rwin[i].rw_local[reg - 16]);
1460 			} while (i > 0);
1461 		}
1462 
1463 		if (fasttrap_fuword32(&fr->fr_local[reg - 16], &v32[1]) != 0)
1464 			goto err;
1465 
1466 		v32[0] = 0;
1467 	}
1468 
1469 	atomic_add_64(&fasttrap_getreg_slow_cnt, 1);
1470 	return (value);
1471 
1472 err:
1473 	/*
1474 	 * If the copy in failed, the process will be in a irrecoverable
1475 	 * state, and we have no choice but to kill it.
1476 	 */
1477 	psignal(ttoproc(curthread), SIGILL);
1478 	return (0);
1479 }
1480 
1481 static uint64_t fasttrap_putreg_fast_cnt;
1482 static uint64_t fasttrap_putreg_mpcb_cnt;
1483 static uint64_t fasttrap_putreg_slow_cnt;
1484 
1485 static void
1486 fasttrap_putreg(struct regs *rp, uint_t reg, ulong_t value)
1487 {
1488 	dtrace_icookie_t cookie;
1489 	struct machpcb *mpcb;
1490 	extern void dtrace_putreg_win(uint_t, ulong_t);
1491 
1492 	if (reg == 0)
1493 		return;
1494 
1495 	if (reg < 16) {
1496 		(&rp->r_g1)[reg - 1] = value;
1497 		return;
1498 	}
1499 
1500 	/*
1501 	 * If the user process is still using some register windows, we
1502 	 * can just place the value in the correct window.
1503 	 */
1504 	cookie = dtrace_interrupt_disable();
1505 	if (dtrace_getotherwin() > 0) {
1506 		dtrace_putreg_win(reg, value);
1507 		dtrace_interrupt_enable(cookie);
1508 		atomic_add_64(&fasttrap_putreg_fast_cnt, 1);
1509 		return;
1510 	}
1511 	dtrace_interrupt_enable(cookie);
1512 
1513 	/*
1514 	 * First see if there's a copy of the register window in the
1515 	 * machpcb structure that we can modify; if there isn't try to
1516 	 * copy out the value. If that fails, we try to create a new
1517 	 * register window in the machpcb structure. While this isn't
1518 	 * _precisely_ the intended use of the machpcb structure, it
1519 	 * can't cause any problems since we know at this point in the
1520 	 * code that all of the user's data have been flushed out of the
1521 	 * register file (since %otherwin is 0).
1522 	 */
1523 	/* LINTED - alignment */
1524 	mpcb = (struct machpcb *)((caddr_t)rp - REGOFF);
1525 
1526 	if (get_udatamodel() == DATAMODEL_NATIVE) {
1527 		struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
1528 		/* LINTED - alignment */
1529 		struct rwindow *rwin = (struct rwindow *)mpcb->mpcb_wbuf;
1530 
1531 		if (mpcb->mpcb_wbcnt > 0) {
1532 			int i = mpcb->mpcb_wbcnt;
1533 			do {
1534 				i--;
1535 				if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1536 					continue;
1537 
1538 				rwin[i].rw_local[reg - 16] = value;
1539 				atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1);
1540 				return;
1541 			} while (i > 0);
1542 		}
1543 
1544 		if (fasttrap_sulword(&fr->fr_local[reg - 16], value) != 0) {
1545 			if (mpcb->mpcb_wbcnt >= MAXWIN || copyin(fr,
1546 			    &rwin[mpcb->mpcb_wbcnt], sizeof (*rwin)) != 0)
1547 				goto err;
1548 
1549 			rwin[mpcb->mpcb_wbcnt].rw_local[reg - 16] = value;
1550 			mpcb->mpcb_spbuf[mpcb->mpcb_wbcnt] = (caddr_t)rp->r_sp;
1551 			mpcb->mpcb_wbcnt++;
1552 			atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1);
1553 			return;
1554 		}
1555 	} else {
1556 		struct frame32 *fr =
1557 		    (struct frame32 *)(uintptr_t)(caddr32_t)rp->r_sp;
1558 		/* LINTED - alignment */
1559 		struct rwindow32 *rwin = (struct rwindow32 *)mpcb->mpcb_wbuf;
1560 		uint32_t v32 = (uint32_t)value;
1561 
1562 		if (mpcb->mpcb_wbcnt > 0) {
1563 			int i = mpcb->mpcb_wbcnt;
1564 			do {
1565 				i--;
1566 				if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1567 					continue;
1568 
1569 				rwin[i].rw_local[reg - 16] = v32;
1570 				atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1);
1571 				return;
1572 			} while (i > 0);
1573 		}
1574 
1575 		if (fasttrap_suword32(&fr->fr_local[reg - 16], v32) != 0) {
1576 			if (mpcb->mpcb_wbcnt >= MAXWIN || copyin(fr,
1577 			    &rwin[mpcb->mpcb_wbcnt], sizeof (*rwin)) != 0)
1578 				goto err;
1579 
1580 			rwin[mpcb->mpcb_wbcnt].rw_local[reg - 16] = v32;
1581 			mpcb->mpcb_spbuf[mpcb->mpcb_wbcnt] = (caddr_t)rp->r_sp;
1582 			mpcb->mpcb_wbcnt++;
1583 			atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1);
1584 			return;
1585 		}
1586 	}
1587 
1588 	atomic_add_64(&fasttrap_putreg_slow_cnt, 1);
1589 	return;
1590 
1591 err:
1592 	/*
1593 	 * If we couldn't record this register's value, the process is in an
1594 	 * irrecoverable state and we have no choice but to euthanize it.
1595 	 */
1596 	psignal(ttoproc(curthread), SIGILL);
1597 }
1598