xref: /titanic_41/usr/src/uts/intel/dtrace/fasttrap_isa.c (revision 264a6e7478846334593be7663fb6b1a8f37784a0)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/fasttrap_isa.h>
30 #include <sys/fasttrap_impl.h>
31 #include <sys/dtrace.h>
32 #include <sys/dtrace_impl.h>
33 #include <sys/cmn_err.h>
34 #include <sys/regset.h>
35 #include <sys/privregs.h>
36 #include <sys/segments.h>
37 #include <sys/sysmacros.h>
38 #include <sys/trap.h>
39 #include <sys/archsystm.h>
40 
41 /*
42  * Lossless User-Land Tracing on x86
43  * ---------------------------------
44  *
45  * The execution of most instructions is not dependent on the address; for
46  * these instructions it is sufficient to copy them into the user process's
47  * address space and execute them. To effectively single-step an instruction
48  * in user-land, we copy out the following sequence of instructions to scratch
49  * space in the user thread's ulwp_t structure.
50  *
51  * We then set the program counter (%eip or %rip) to point to this scratch
52  * space. Once execution resumes, the original instruction is executed and
53  * then control flow is redirected to what was originally the subsequent
54  * instruction. If the kernel attemps to deliver a signal while single-
55  * stepping, the signal is deferred and the program counter is moved into the
56  * second sequence of instructions. The second sequence ends in a trap into
57  * the kernel where the deferred signal is then properly handled and delivered.
58  *
59  * For instructions whose execute is position dependent, we perform simple
60  * emulation. These instructions are limited to control transfer
61  * instructions in 32-bit mode, but in 64-bit mode there's the added wrinkle
62  * of %rip-relative addressing that means that almost any instruction can be
63  * position dependent. For all the details on how we emulate generic
64  * instructions included %rip-relative instructions, see the code in
65  * fasttrap_pid_probe() below where we handle instructions of type
66  * FASTTRAP_T_COMMON (under the header: Generic Instruction Tracing).
67  */
68 
69 #define	FASTTRAP_MODRM_MOD(modrm)	(((modrm) >> 6) & 0x3)
70 #define	FASTTRAP_MODRM_REG(modrm)	(((modrm) >> 3) & 0x7)
71 #define	FASTTRAP_MODRM_RM(modrm)	((modrm) & 0x7)
72 #define	FASTTRAP_MODRM(mod, reg, rm)	(((mod) << 6) | ((reg) << 3) | (rm))
73 
74 #define	FASTTRAP_SIB_SCALE(sib)		(((sib) >> 6) & 0x3)
75 #define	FASTTRAP_SIB_INDEX(sib)		(((sib) >> 3) & 0x7)
76 #define	FASTTRAP_SIB_BASE(sib)		((sib) & 0x7)
77 
78 #define	FASTTRAP_REX_W(rex)		(((rex) >> 3) & 1)
79 #define	FASTTRAP_REX_R(rex)		(((rex) >> 2) & 1)
80 #define	FASTTRAP_REX_X(rex)		(((rex) >> 1) & 1)
81 #define	FASTTRAP_REX_B(rex)		((rex) & 1)
82 #define	FASTTRAP_REX(w, r, x, b)	\
83 	(0x40 | ((w) << 3) | ((r) << 2) | ((x) << 1) | (b))
84 
85 /*
86  * Single-byte op-codes.
87  */
88 #define	FASTTRAP_PUSHL_EBP	0x55
89 
90 #define	FASTTRAP_JO		0x70
91 #define	FASTTRAP_JNO		0x71
92 #define	FASTTRAP_JB		0x72
93 #define	FASTTRAP_JAE		0x73
94 #define	FASTTRAP_JE		0x74
95 #define	FASTTRAP_JNE		0x75
96 #define	FASTTRAP_JBE		0x76
97 #define	FASTTRAP_JA		0x77
98 #define	FASTTRAP_JS		0x78
99 #define	FASTTRAP_JNS		0x79
100 #define	FASTTRAP_JP		0x7a
101 #define	FASTTRAP_JNP		0x7b
102 #define	FASTTRAP_JL		0x7c
103 #define	FASTTRAP_JGE		0x7d
104 #define	FASTTRAP_JLE		0x7e
105 #define	FASTTRAP_JG		0x7f
106 
107 #define	FASTTRAP_MOV_EAX	0xb8
108 #define	FASTTRAP_MOV_ECX	0xb9
109 
110 #define	FASTTRAP_RET16		0xc2
111 #define	FASTTRAP_RET		0xc3
112 
113 #define	FASTTRAP_LOOPNZ		0xe0
114 #define	FASTTRAP_LOOPZ		0xe1
115 #define	FASTTRAP_LOOP		0xe2
116 #define	FASTTRAP_JCXZ		0xe3
117 
118 #define	FASTTRAP_CALL		0xe8
119 #define	FASTTRAP_JMP32		0xe9
120 #define	FASTTRAP_JMP8		0xeb
121 
122 #define	FASTTRAP_INT3		0xcc
123 #define	FASTTRAP_INT		0xcd
124 
125 #define	FASTTRAP_2_BYTE_OP	0x0f
126 #define	FASTTRAP_GROUP5_OP	0xff
127 
128 /*
129  * Two-byte op-codes (second byte only).
130  */
131 #define	FASTTRAP_0F_JO		0x80
132 #define	FASTTRAP_0F_JNO		0x81
133 #define	FASTTRAP_0F_JB		0x82
134 #define	FASTTRAP_0F_JAE		0x83
135 #define	FASTTRAP_0F_JE		0x84
136 #define	FASTTRAP_0F_JNE		0x85
137 #define	FASTTRAP_0F_JBE		0x86
138 #define	FASTTRAP_0F_JA		0x87
139 #define	FASTTRAP_0F_JS		0x88
140 #define	FASTTRAP_0F_JNS		0x89
141 #define	FASTTRAP_0F_JP		0x8a
142 #define	FASTTRAP_0F_JNP		0x8b
143 #define	FASTTRAP_0F_JL		0x8c
144 #define	FASTTRAP_0F_JGE		0x8d
145 #define	FASTTRAP_0F_JLE		0x8e
146 #define	FASTTRAP_0F_JG		0x8f
147 
148 #define	FASTTRAP_EFLAGS_OF	0x800
149 #define	FASTTRAP_EFLAGS_DF	0x400
150 #define	FASTTRAP_EFLAGS_SF	0x080
151 #define	FASTTRAP_EFLAGS_ZF	0x040
152 #define	FASTTRAP_EFLAGS_AF	0x010
153 #define	FASTTRAP_EFLAGS_PF	0x004
154 #define	FASTTRAP_EFLAGS_CF	0x001
155 
156 /*
157  * Instruction prefixes.
158  */
159 #define	FASTTRAP_PREFIX_OPERAND	0x66
160 #define	FASTTRAP_PREFIX_ADDRESS	0x67
161 #define	FASTTRAP_PREFIX_CS	0x2E
162 #define	FASTTRAP_PREFIX_DS	0x3E
163 #define	FASTTRAP_PREFIX_ES	0x26
164 #define	FASTTRAP_PREFIX_FS	0x64
165 #define	FASTTRAP_PREFIX_GS	0x65
166 #define	FASTTRAP_PREFIX_SS	0x36
167 #define	FASTTRAP_PREFIX_LOCK	0xF0
168 #define	FASTTRAP_PREFIX_REP	0xF3
169 #define	FASTTRAP_PREFIX_REPNE	0xF2
170 
171 #define	FASTTRAP_NOREG	0xff
172 
173 /*
174  * Map between instruction register encodings and the kernel constants which
175  * correspond to indicies into struct regs.
176  */
177 #ifdef __amd64
178 static const uint8_t regmap[16] = {
179 	REG_RAX, REG_RCX, REG_RDX, REG_RBX, REG_RSP, REG_RBP, REG_RSI, REG_RDI,
180 	REG_R8, REG_R9, REG_R10, REG_R11, REG_R12, REG_R13, REG_R14, REG_R15,
181 };
182 #else
183 static const uint8_t regmap[8] = {
184 	EAX, ECX, EDX, EBX, UESP, EBP, ESI, EDI
185 };
186 #endif
187 
188 static ulong_t fasttrap_getreg(struct regs *, uint_t);
189 
190 static uint64_t
191 fasttrap_anarg(struct regs *rp, int function_entry, int argno)
192 {
193 	uint64_t value;
194 	int shift = function_entry ? 1 : 0;
195 
196 #ifdef __amd64
197 	if (curproc->p_model == DATAMODEL_LP64) {
198 		uintptr_t *stack;
199 
200 		/*
201 		 * In 64-bit mode, the first six arguments are stored in
202 		 * registers.
203 		 */
204 		if (argno < 6)
205 			return ((&rp->r_rdi)[argno]);
206 
207 		stack = (uintptr_t *)rp->r_sp;
208 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
209 		value = dtrace_fulword(&stack[argno - 6 + shift]);
210 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR);
211 	} else {
212 #endif
213 		uint32_t *stack = (uint32_t *)rp->r_sp;
214 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
215 		value = dtrace_fuword32(&stack[argno + shift]);
216 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR);
217 #ifdef __amd64
218 	}
219 #endif
220 
221 	return (value);
222 }
223 
224 /*ARGSUSED*/
225 int
226 fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, uintptr_t pc,
227     fasttrap_probe_type_t type)
228 {
229 	uint8_t instr[FASTTRAP_MAX_INSTR_SIZE + 10];
230 	size_t len = FASTTRAP_MAX_INSTR_SIZE;
231 	size_t first = MIN(len, PAGESIZE - (pc & PAGEOFFSET));
232 	uint_t start = 0;
233 	int rmindex;
234 	uint8_t seg, rex = 0;
235 
236 	/*
237 	 * Read the instruction at the given address out of the process's
238 	 * address space. We don't have to worry about a debugger
239 	 * changing this instruction before we overwrite it with our trap
240 	 * instruction since P_PR_LOCK is set. Since instructions can span
241 	 * pages, we potentially read the instruction in two parts. If the
242 	 * second part fails, we just zero out that part of the instruction.
243 	 */
244 	if (uread(p, &instr[0], first, pc) != 0)
245 		return (-1);
246 	if (len > first &&
247 	    uread(p, &instr[first], len - first, pc + first) != 0) {
248 		bzero(&instr[first], len - first);
249 		len = first;
250 	}
251 
252 	/*
253 	 * If the disassembly fails, then we have a malformed instruction.
254 	 */
255 	if ((tp->ftt_size = dtrace_instr_size_isa(instr, p->p_model,
256 	    &rmindex)) <= 0)
257 		return (-1);
258 
259 	/*
260 	 * Make sure the disassembler isn't completely broken.
261 	 */
262 	ASSERT(-1 <= rmindex && rmindex < tp->ftt_size);
263 
264 	/*
265 	 * If the computed size is greater than the number of bytes read,
266 	 * then it was a malformed instruction possibly because it fell on a
267 	 * page boundary and the subsequent page was missing or because of
268 	 * some malicious user.
269 	 */
270 	if (tp->ftt_size > len)
271 		return (-1);
272 
273 	tp->ftt_segment = FASTTRAP_SEG_NONE;
274 
275 	/*
276 	 * Find the start of the instruction's opcode by processing any
277 	 * legacy prefixes.
278 	 */
279 	for (;;) {
280 		seg = 0;
281 		switch (instr[start]) {
282 		case FASTTRAP_PREFIX_SS:
283 			seg++;
284 			/*FALLTHRU*/
285 		case FASTTRAP_PREFIX_GS:
286 			seg++;
287 			/*FALLTHRU*/
288 		case FASTTRAP_PREFIX_FS:
289 			seg++;
290 			/*FALLTHRU*/
291 		case FASTTRAP_PREFIX_ES:
292 			seg++;
293 			/*FALLTHRU*/
294 		case FASTTRAP_PREFIX_DS:
295 			seg++;
296 			/*FALLTHRU*/
297 		case FASTTRAP_PREFIX_CS:
298 			seg++;
299 			/*FALLTHRU*/
300 		case FASTTRAP_PREFIX_OPERAND:
301 		case FASTTRAP_PREFIX_ADDRESS:
302 		case FASTTRAP_PREFIX_LOCK:
303 		case FASTTRAP_PREFIX_REP:
304 		case FASTTRAP_PREFIX_REPNE:
305 			if (seg != 0) {
306 				/*
307 				 * It's illegal for an instruction to specify
308 				 * two segment prefixes -- give up on this
309 				 * illegal instruction.
310 				 */
311 				if (tp->ftt_segment != FASTTRAP_SEG_NONE)
312 					return (-1);
313 
314 				tp->ftt_segment = seg;
315 			}
316 			start++;
317 			continue;
318 		}
319 		break;
320 	}
321 
322 #ifdef __amd64
323 	/*
324 	 * Identify the REX prefix on 64-bit processes.
325 	 */
326 	if (p->p_model == DATAMODEL_LP64 && (instr[start] & 0xf0) == 0x40)
327 		rex = instr[start++];
328 #endif
329 
330 	/*
331 	 * Now that we're pretty sure that the instruction is okay, copy the
332 	 * valid part to the tracepoint.
333 	 */
334 	bcopy(instr, tp->ftt_instr, FASTTRAP_MAX_INSTR_SIZE);
335 
336 	tp->ftt_type = FASTTRAP_T_COMMON;
337 	if (instr[start] == FASTTRAP_2_BYTE_OP) {
338 		switch (instr[start + 1]) {
339 		case FASTTRAP_0F_JO:
340 		case FASTTRAP_0F_JNO:
341 		case FASTTRAP_0F_JB:
342 		case FASTTRAP_0F_JAE:
343 		case FASTTRAP_0F_JE:
344 		case FASTTRAP_0F_JNE:
345 		case FASTTRAP_0F_JBE:
346 		case FASTTRAP_0F_JA:
347 		case FASTTRAP_0F_JS:
348 		case FASTTRAP_0F_JNS:
349 		case FASTTRAP_0F_JP:
350 		case FASTTRAP_0F_JNP:
351 		case FASTTRAP_0F_JL:
352 		case FASTTRAP_0F_JGE:
353 		case FASTTRAP_0F_JLE:
354 		case FASTTRAP_0F_JG:
355 			tp->ftt_type = FASTTRAP_T_JCC;
356 			tp->ftt_code = (instr[start + 1] & 0x0f) | FASTTRAP_JO;
357 			tp->ftt_dest = pc + tp->ftt_size +
358 			    *(int32_t *)&instr[start + 2];
359 			break;
360 		}
361 	} else if (instr[start] == FASTTRAP_GROUP5_OP) {
362 		uint_t mod = FASTTRAP_MODRM_MOD(instr[start + 1]);
363 		uint_t reg = FASTTRAP_MODRM_REG(instr[start + 1]);
364 		uint_t rm = FASTTRAP_MODRM_RM(instr[start + 1]);
365 
366 		if (reg == 2 || reg == 4) {
367 			uint_t i, sz;
368 
369 			if (reg == 2)
370 				tp->ftt_type = FASTTRAP_T_CALL;
371 			else
372 				tp->ftt_type = FASTTRAP_T_JMP;
373 
374 			if (mod == 3)
375 				tp->ftt_code = 2;
376 			else
377 				tp->ftt_code = 1;
378 
379 			ASSERT(p->p_model == DATAMODEL_LP64 || rex == 0);
380 
381 			/*
382 			 * See AMD x86-64 Architecture Programmer's Manual
383 			 * Volume 3, Section 1.2.7, Table 1-12, and
384 			 * Appendix A.3.1, Table A-15.
385 			 */
386 			if (mod != 3 && rm == 4) {
387 				uint8_t sib = instr[start + 2];
388 				uint_t index = FASTTRAP_SIB_INDEX(sib);
389 				uint_t base = FASTTRAP_SIB_BASE(sib);
390 
391 				tp->ftt_scale = FASTTRAP_SIB_SCALE(sib);
392 
393 				tp->ftt_index = (index == 4) ?
394 				    FASTTRAP_NOREG :
395 				    regmap[index | (FASTTRAP_REX_X(rex) << 3)];
396 				tp->ftt_base = (mod == 0 && base == 5) ?
397 				    FASTTRAP_NOREG :
398 				    regmap[base | (FASTTRAP_REX_B(rex) << 3)];
399 
400 				i = 3;
401 				sz = mod == 1 ? 1 : 4;
402 			} else {
403 				/*
404 				 * In 64-bit mode, mod == 0 and r/m == 5
405 				 * denotes %rip-relative addressing; in 32-bit
406 				 * mode, the base register isn't used. In both
407 				 * modes, there is a 32-bit operand.
408 				 */
409 				if (mod == 0 && rm == 5) {
410 #ifdef __amd64
411 					if (p->p_model == DATAMODEL_LP64)
412 						tp->ftt_base = REG_RIP;
413 					else
414 #endif
415 						tp->ftt_base = FASTTRAP_NOREG;
416 					sz = 4;
417 				} else  {
418 					uint8_t base = rm |
419 					    (FASTTRAP_REX_B(rex) << 3);
420 
421 					tp->ftt_base = regmap[base];
422 					sz = mod == 1 ? 1 : mod == 2 ? 4 : 0;
423 				}
424 				tp->ftt_index = FASTTRAP_NOREG;
425 				i = 2;
426 			}
427 
428 			if (sz == 1)
429 				tp->ftt_dest = *(int8_t *)&instr[start + i];
430 			else if (sz == 4)
431 				tp->ftt_dest = *(int32_t *)&instr[start + i];
432 			else
433 				tp->ftt_dest = 0;
434 		}
435 	} else {
436 		switch (instr[start]) {
437 		case FASTTRAP_RET:
438 			tp->ftt_type = FASTTRAP_T_RET;
439 			break;
440 
441 		case FASTTRAP_RET16:
442 			tp->ftt_type = FASTTRAP_T_RET16;
443 			tp->ftt_dest = *(uint16_t *)&instr[start + 1];
444 			break;
445 
446 		case FASTTRAP_JO:
447 		case FASTTRAP_JNO:
448 		case FASTTRAP_JB:
449 		case FASTTRAP_JAE:
450 		case FASTTRAP_JE:
451 		case FASTTRAP_JNE:
452 		case FASTTRAP_JBE:
453 		case FASTTRAP_JA:
454 		case FASTTRAP_JS:
455 		case FASTTRAP_JNS:
456 		case FASTTRAP_JP:
457 		case FASTTRAP_JNP:
458 		case FASTTRAP_JL:
459 		case FASTTRAP_JGE:
460 		case FASTTRAP_JLE:
461 		case FASTTRAP_JG:
462 			tp->ftt_type = FASTTRAP_T_JCC;
463 			tp->ftt_code = instr[start];
464 			tp->ftt_dest = pc + tp->ftt_size +
465 			    (int8_t)instr[start + 1];
466 			break;
467 
468 		case FASTTRAP_LOOPNZ:
469 		case FASTTRAP_LOOPZ:
470 		case FASTTRAP_LOOP:
471 			tp->ftt_type = FASTTRAP_T_LOOP;
472 			tp->ftt_code = instr[start];
473 			tp->ftt_dest = pc + tp->ftt_size +
474 			    (int8_t)instr[start + 1];
475 			break;
476 
477 		case FASTTRAP_JCXZ:
478 			tp->ftt_type = FASTTRAP_T_JCXZ;
479 			tp->ftt_dest = pc + tp->ftt_size +
480 			    (int8_t)instr[start + 1];
481 			break;
482 
483 		case FASTTRAP_CALL:
484 			tp->ftt_type = FASTTRAP_T_CALL;
485 			tp->ftt_dest = pc + tp->ftt_size +
486 			    *(int32_t *)&instr[start + 1];
487 			tp->ftt_code = 0;
488 			break;
489 
490 		case FASTTRAP_JMP32:
491 			tp->ftt_type = FASTTRAP_T_JMP;
492 			tp->ftt_dest = pc + tp->ftt_size +
493 			    *(int32_t *)&instr[start + 1];
494 			break;
495 		case FASTTRAP_JMP8:
496 			tp->ftt_type = FASTTRAP_T_JMP;
497 			tp->ftt_dest = pc + tp->ftt_size +
498 			    (int8_t)instr[start + 1];
499 			break;
500 
501 		case FASTTRAP_PUSHL_EBP:
502 			if (start == 0)
503 				tp->ftt_type = FASTTRAP_T_PUSHL_EBP;
504 			break;
505 
506 		case FASTTRAP_INT3:
507 			/*
508 			 * The pid provider shares the int3 trap with debugger
509 			 * breakpoints so we can't instrument them.
510 			 */
511 			ASSERT(instr[start] == FASTTRAP_INSTR);
512 			return (-1);
513 
514 		case FASTTRAP_INT:
515 			/*
516 			 * Interrupts seem like they could be traced with
517 			 * no negative implications, but it's possible that
518 			 * a thread could be redirected by the trap handling
519 			 * code which would eventually return to the
520 			 * instruction after the interrupt. If the interrupt
521 			 * were in our scratch space, the subsequent
522 			 * instruction might be overwritten before we return.
523 			 * Accordingly we refuse to instrument any interrupt.
524 			 */
525 			return (-1);
526 		}
527 	}
528 
529 #ifdef __amd64
530 	if (p->p_model == DATAMODEL_LP64 && tp->ftt_type == FASTTRAP_T_COMMON) {
531 		/*
532 		 * If the process is 64-bit and the instruction type is still
533 		 * FASTTRAP_T_COMMON -- meaning we're going to copy it out an
534 		 * execute it -- we need to watch for %rip-relative
535 		 * addressing mode. See the portion of fasttrap_pid_probe()
536 		 * below where we handle tracepoints with type
537 		 * FASTTRAP_T_COMMON for how we emulate instructions that
538 		 * employ %rip-relative addressing.
539 		 */
540 		if (rmindex != -1) {
541 			uint_t mod = FASTTRAP_MODRM_MOD(instr[rmindex]);
542 			uint_t reg = FASTTRAP_MODRM_REG(instr[rmindex]);
543 			uint_t rm = FASTTRAP_MODRM_RM(instr[rmindex]);
544 
545 			ASSERT(rmindex > start);
546 
547 			if (mod == 0 && rm == 5) {
548 				/*
549 				 * We need to be sure to avoid other
550 				 * registers used by this instruction. While
551 				 * the reg field may determine the op code
552 				 * rather than denoting a register, assuming
553 				 * that it denotes a register is always safe.
554 				 * We leave the REX field intact and use
555 				 * whatever value's there for simplicity.
556 				 */
557 				if (reg != 0) {
558 					tp->ftt_ripmode = FASTTRAP_RIP_1 |
559 					    (FASTTRAP_RIP_X *
560 					    FASTTRAP_REX_B(rex));
561 					rm = 0;
562 				} else {
563 					tp->ftt_ripmode = FASTTRAP_RIP_2 |
564 					    (FASTTRAP_RIP_X *
565 					    FASTTRAP_REX_B(rex));
566 					rm = 1;
567 				}
568 
569 				tp->ftt_modrm = tp->ftt_instr[rmindex];
570 				tp->ftt_instr[rmindex] =
571 				    FASTTRAP_MODRM(2, reg, rm);
572 			}
573 		}
574 	}
575 #endif
576 
577 	return (0);
578 }
579 
580 int
581 fasttrap_tracepoint_install(proc_t *p, fasttrap_tracepoint_t *tp)
582 {
583 	fasttrap_instr_t instr = FASTTRAP_INSTR;
584 
585 	if (uwrite(p, &instr, 1, tp->ftt_pc) != 0)
586 		return (-1);
587 
588 	return (0);
589 }
590 
591 int
592 fasttrap_tracepoint_remove(proc_t *p, fasttrap_tracepoint_t *tp)
593 {
594 	uint8_t instr;
595 
596 	/*
597 	 * Distinguish between read or write failures and a changed
598 	 * instruction.
599 	 */
600 	if (uread(p, &instr, 1, tp->ftt_pc) != 0)
601 		return (0);
602 	if (instr != FASTTRAP_INSTR)
603 		return (0);
604 	if (uwrite(p, &tp->ftt_instr[0], 1, tp->ftt_pc) != 0)
605 		return (-1);
606 
607 	return (0);
608 }
609 
610 static uintptr_t
611 fasttrap_fulword_noerr(const void *uaddr)
612 {
613 	uintptr_t ret;
614 
615 	if (fasttrap_fulword(uaddr, &ret) == 0)
616 		return (ret);
617 
618 	return (0);
619 }
620 
621 static uint32_t
622 fasttrap_fuword32_noerr(const void *uaddr)
623 {
624 	uint32_t ret;
625 
626 	if (fasttrap_fuword32(uaddr, &ret) == 0)
627 		return (ret);
628 
629 	return (0);
630 }
631 
632 static void
633 fasttrap_return_common(struct regs *rp, uintptr_t pc, pid_t pid,
634     uintptr_t new_pc)
635 {
636 	fasttrap_tracepoint_t *tp;
637 	fasttrap_bucket_t *bucket;
638 	fasttrap_id_t *id;
639 	kmutex_t *pid_mtx;
640 
641 	pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
642 	mutex_enter(pid_mtx);
643 	bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
644 
645 	for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
646 		if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
647 		    !tp->ftt_proc->ftpc_defunct)
648 			break;
649 	}
650 
651 	/*
652 	 * Don't sweat it if we can't find the tracepoint again; unlike
653 	 * when we're in fasttrap_pid_probe(), finding the tracepoint here
654 	 * is not essential to the correct execution of the process.
655 	 */
656 	if (tp == NULL) {
657 		mutex_exit(pid_mtx);
658 		return;
659 	}
660 
661 	for (id = tp->ftt_retids; id != NULL; id = id->fti_next) {
662 		/*
663 		 * If there's a branch that could act as a return site, we
664 		 * need to trace it, and check here if the program counter is
665 		 * external to the function.
666 		 */
667 		if (tp->ftt_type != FASTTRAP_T_RET &&
668 		    tp->ftt_type != FASTTRAP_T_RET16 &&
669 		    new_pc - id->fti_probe->ftp_faddr <
670 		    id->fti_probe->ftp_fsize)
671 			continue;
672 
673 		dtrace_probe(id->fti_probe->ftp_id,
674 		    pc - id->fti_probe->ftp_faddr,
675 		    rp->r_r0, rp->r_r1, 0, 0);
676 	}
677 
678 	mutex_exit(pid_mtx);
679 }
680 
681 static void
682 fasttrap_sigsegv(proc_t *p, kthread_t *t, uintptr_t addr)
683 {
684 	sigqueue_t *sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
685 
686 	sqp->sq_info.si_signo = SIGSEGV;
687 	sqp->sq_info.si_code = SEGV_MAPERR;
688 	sqp->sq_info.si_addr = (caddr_t)addr;
689 
690 	mutex_enter(&p->p_lock);
691 	sigaddqa(p, t, sqp);
692 	mutex_exit(&p->p_lock);
693 
694 	if (t != NULL)
695 		aston(t);
696 }
697 
698 #ifdef __amd64
699 static void
700 fasttrap_usdt_args64(fasttrap_probe_t *probe, struct regs *rp, int argc,
701     uintptr_t *argv)
702 {
703 	int i, x, cap = MIN(argc, probe->ftp_nargs);
704 	uintptr_t *stack = (uintptr_t *)rp->r_sp;
705 
706 	for (i = 0; i < cap; i++) {
707 		x = probe->ftp_argmap[i];
708 
709 		if (x < 6)
710 			argv[i] = (&rp->r_rdi)[x];
711 		else
712 			argv[i] = fasttrap_fulword_noerr(&stack[x]);
713 	}
714 
715 	for (; i < argc; i++) {
716 		argv[i] = 0;
717 	}
718 }
719 #endif
720 
721 static void
722 fasttrap_usdt_args32(fasttrap_probe_t *probe, struct regs *rp, int argc,
723     uint32_t *argv)
724 {
725 	int i, x, cap = MIN(argc, probe->ftp_nargs);
726 	uint32_t *stack = (uint32_t *)rp->r_sp;
727 
728 	for (i = 0; i < cap; i++) {
729 		x = probe->ftp_argmap[i];
730 
731 		argv[i] = fasttrap_fuword32_noerr(&stack[x]);
732 	}
733 
734 	for (; i < argc; i++) {
735 		argv[i] = 0;
736 	}
737 }
738 
739 static int
740 fasttrap_do_seg(fasttrap_tracepoint_t *tp, struct regs *rp, uintptr_t *addr)
741 {
742 	proc_t *p = curproc;
743 	user_desc_t *desc;
744 	uint16_t sel, ndx, type;
745 	uintptr_t limit;
746 
747 	switch (tp->ftt_segment) {
748 	case FASTTRAP_SEG_CS:
749 		sel = rp->r_cs;
750 		break;
751 	case FASTTRAP_SEG_DS:
752 		sel = rp->r_ds;
753 		break;
754 	case FASTTRAP_SEG_ES:
755 		sel = rp->r_es;
756 		break;
757 	case FASTTRAP_SEG_FS:
758 		sel = rp->r_fs;
759 		break;
760 	case FASTTRAP_SEG_GS:
761 		sel = rp->r_gs;
762 		break;
763 	case FASTTRAP_SEG_SS:
764 		sel = rp->r_ss;
765 		break;
766 	}
767 
768 	/*
769 	 * Make sure the given segment register specifies a user priority
770 	 * selector rather than a kernel selector.
771 	 */
772 	if (!SELISUPL(sel))
773 		return (-1);
774 
775 	ndx = SELTOIDX(sel);
776 
777 	/*
778 	 * Check the bounds and grab the descriptor out of the specified
779 	 * descriptor table.
780 	 */
781 	if (SELISLDT(sel)) {
782 		if (ndx > p->p_ldtlimit)
783 			return (-1);
784 
785 		desc = p->p_ldt + ndx;
786 
787 	} else {
788 		if (ndx >= NGDT)
789 			return (-1);
790 
791 		desc = cpu_get_gdt() + ndx;
792 	}
793 
794 	/*
795 	 * The descriptor must have user privilege level and it must be
796 	 * present in memory.
797 	 */
798 	if (desc->usd_dpl != SEL_UPL || desc->usd_p != 1)
799 		return (-1);
800 
801 	type = desc->usd_type;
802 
803 	/*
804 	 * If the S bit in the type field is not set, this descriptor can
805 	 * only be used in system context.
806 	 */
807 	if ((type & 0x10) != 0x10)
808 		return (-1);
809 
810 	limit = USEGD_GETLIMIT(desc) * (desc->usd_gran ? PAGESIZE : 1);
811 
812 	if (tp->ftt_segment == FASTTRAP_SEG_CS) {
813 		/*
814 		 * The code/data bit and readable bit must both be set.
815 		 */
816 		if ((type & 0xa) != 0xa)
817 			return (-1);
818 
819 		if (*addr > limit)
820 			return (-1);
821 	} else {
822 		/*
823 		 * The code/data bit must be clear.
824 		 */
825 		if ((type & 0x8) != 0)
826 			return (-1);
827 
828 		/*
829 		 * If the expand-down bit is clear, we just check the limit as
830 		 * it would naturally be applied. Otherwise, we need to check
831 		 * that the address is the range [limit + 1 .. 0xffff] or
832 		 * [limit + 1 ... 0xffffffff] depending on if the default
833 		 * operand size bit is set.
834 		 */
835 		if ((type & 0x4) == 0) {
836 			if (*addr > limit)
837 				return (-1);
838 		} else if (desc->usd_def32) {
839 			if (*addr < limit + 1 || 0xffff < *addr)
840 				return (-1);
841 		} else {
842 			if (*addr < limit + 1 || 0xffffffff < *addr)
843 				return (-1);
844 		}
845 	}
846 
847 	*addr += USEGD_GETBASE(desc);
848 
849 	return (0);
850 }
851 
852 int
853 fasttrap_pid_probe(struct regs *rp)
854 {
855 	proc_t *p = curproc;
856 	uintptr_t pc = rp->r_pc - 1, new_pc = 0;
857 	fasttrap_bucket_t *bucket;
858 	kmutex_t *pid_mtx;
859 	fasttrap_tracepoint_t *tp, tp_local;
860 	pid_t pid;
861 	dtrace_icookie_t cookie;
862 	uint_t is_enabled = 0;
863 
864 	/*
865 	 * It's possible that a user (in a veritable orgy of bad planning)
866 	 * could redirect this thread's flow of control before it reached the
867 	 * return probe fasttrap. In this case we need to kill the process
868 	 * since it's in a unrecoverable state.
869 	 */
870 	if (curthread->t_dtrace_step) {
871 		ASSERT(curthread->t_dtrace_on);
872 		fasttrap_sigtrap(p, curthread, pc);
873 		return (0);
874 	}
875 
876 	/*
877 	 * Clear all user tracing flags.
878 	 */
879 	curthread->t_dtrace_ft = 0;
880 	curthread->t_dtrace_pc = 0;
881 	curthread->t_dtrace_npc = 0;
882 	curthread->t_dtrace_scrpc = 0;
883 	curthread->t_dtrace_astpc = 0;
884 #ifdef __amd64
885 	curthread->t_dtrace_regv = 0;
886 #endif
887 
888 	/*
889 	 * Treat a child created by a call to vfork(2) as if it were its
890 	 * parent. We know that there's only one thread of control in such a
891 	 * process: this one.
892 	 */
893 	while (p->p_flag & SVFORK) {
894 		p = p->p_parent;
895 	}
896 
897 	pid = p->p_pid;
898 	pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
899 	mutex_enter(pid_mtx);
900 	bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
901 
902 	/*
903 	 * Lookup the tracepoint that the process just hit.
904 	 */
905 	for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
906 		if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
907 		    !tp->ftt_proc->ftpc_defunct)
908 			break;
909 	}
910 
911 	/*
912 	 * If we couldn't find a matching tracepoint, either a tracepoint has
913 	 * been inserted without using the pid<pid> ioctl interface (see
914 	 * fasttrap_ioctl), or somehow we have mislaid this tracepoint.
915 	 */
916 	if (tp == NULL) {
917 		mutex_exit(pid_mtx);
918 		return (-1);
919 	}
920 
921 	/*
922 	 * Set the program counter to the address of the traced instruction
923 	 * so that it looks right in ustack() output.
924 	 */
925 	rp->r_pc = pc;
926 
927 	if (tp->ftt_ids != NULL) {
928 		fasttrap_id_t *id;
929 
930 #ifdef __amd64
931 		if (p->p_model == DATAMODEL_LP64) {
932 			for (id = tp->ftt_ids; id != NULL; id = id->fti_next) {
933 				fasttrap_probe_t *probe = id->fti_probe;
934 
935 				if (id->fti_ptype == DTFTP_ENTRY) {
936 					/*
937 					 * We note that this was an entry
938 					 * probe to help ustack() find the
939 					 * first caller.
940 					 */
941 					cookie = dtrace_interrupt_disable();
942 					DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY);
943 					dtrace_probe(probe->ftp_id, rp->r_rdi,
944 					    rp->r_rsi, rp->r_rdx, rp->r_rcx,
945 					    rp->r_r8);
946 					DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY);
947 					dtrace_interrupt_enable(cookie);
948 				} else if (id->fti_ptype == DTFTP_IS_ENABLED) {
949 					/*
950 					 * Note that in this case, we don't
951 					 * call dtrace_probe() since it's only
952 					 * an artificial probe meant to change
953 					 * the flow of control so that it
954 					 * encounters the true probe.
955 					 */
956 					is_enabled = 1;
957 				} else if (probe->ftp_argmap == NULL) {
958 					dtrace_probe(probe->ftp_id, rp->r_rdi,
959 					    rp->r_rsi, rp->r_rdx, rp->r_rcx,
960 					    rp->r_r8);
961 				} else {
962 					uintptr_t t[5];
963 
964 					fasttrap_usdt_args64(probe, rp,
965 					    sizeof (t) / sizeof (t[0]), t);
966 
967 					dtrace_probe(probe->ftp_id, t[0], t[1],
968 					    t[2], t[3], t[4]);
969 				}
970 			}
971 		} else {
972 #endif
973 			uintptr_t s0, s1, s2, s3, s4, s5;
974 			uint32_t *stack = (uint32_t *)rp->r_sp;
975 
976 			/*
977 			 * In 32-bit mode, all arguments are passed on the
978 			 * stack. If this is a function entry probe, we need
979 			 * to skip the first entry on the stack as it
980 			 * represents the return address rather than a
981 			 * parameter to the function.
982 			 */
983 			s0 = fasttrap_fuword32_noerr(&stack[0]);
984 			s1 = fasttrap_fuword32_noerr(&stack[1]);
985 			s2 = fasttrap_fuword32_noerr(&stack[2]);
986 			s3 = fasttrap_fuword32_noerr(&stack[3]);
987 			s4 = fasttrap_fuword32_noerr(&stack[4]);
988 			s5 = fasttrap_fuword32_noerr(&stack[5]);
989 
990 			for (id = tp->ftt_ids; id != NULL; id = id->fti_next) {
991 				fasttrap_probe_t *probe = id->fti_probe;
992 
993 				if (id->fti_ptype == DTFTP_ENTRY) {
994 					/*
995 					 * We note that this was an entry
996 					 * probe to help ustack() find the
997 					 * first caller.
998 					 */
999 					cookie = dtrace_interrupt_disable();
1000 					DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY);
1001 					dtrace_probe(probe->ftp_id, s1, s2,
1002 					    s3, s4, s5);
1003 					DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY);
1004 					dtrace_interrupt_enable(cookie);
1005 				} else if (id->fti_ptype == DTFTP_IS_ENABLED) {
1006 					/*
1007 					 * Note that in this case, we don't
1008 					 * call dtrace_probe() since it's only
1009 					 * an artificial probe meant to change
1010 					 * the flow of control so that it
1011 					 * encounters the true probe.
1012 					 */
1013 					is_enabled = 1;
1014 				} else if (probe->ftp_argmap == NULL) {
1015 					dtrace_probe(probe->ftp_id, s0, s1,
1016 					    s2, s3, s4);
1017 				} else {
1018 					uint32_t t[5];
1019 
1020 					fasttrap_usdt_args32(probe, rp,
1021 					    sizeof (t) / sizeof (t[0]), t);
1022 
1023 					dtrace_probe(probe->ftp_id, t[0], t[1],
1024 					    t[2], t[3], t[4]);
1025 				}
1026 			}
1027 #ifdef __amd64
1028 		}
1029 #endif
1030 	}
1031 
1032 	/*
1033 	 * We're about to do a bunch of work so we cache a local copy of
1034 	 * the tracepoint to emulate the instruction, and then find the
1035 	 * tracepoint again later if we need to light up any return probes.
1036 	 */
1037 	tp_local = *tp;
1038 	mutex_exit(pid_mtx);
1039 	tp = &tp_local;
1040 
1041 	/*
1042 	 * Set the program counter to appear as though the traced instruction
1043 	 * had completely executed. This ensures that fasttrap_getreg() will
1044 	 * report the expected value for REG_RIP.
1045 	 */
1046 	rp->r_pc = pc + tp->ftt_size;
1047 
1048 	/*
1049 	 * If there's an is-enabled probe connected to this tracepoint it
1050 	 * means that there was a 'xorl %eax, %eax' or 'xorq %rax, %rax'
1051 	 * instruction that was placed there by DTrace when the binary was
1052 	 * linked. As this probe is, in fact, enabled, we need to stuff 1
1053 	 * into %eax or %rax. Accordingly, we can bypass all the instruction
1054 	 * emulation logic since we know the inevitable result. It's possible
1055 	 * that a user could construct a scenario where the 'is-enabled'
1056 	 * probe was on some other instruction, but that would be a rather
1057 	 * exotic way to shoot oneself in the foot.
1058 	 */
1059 	if (is_enabled) {
1060 		rp->r_r0 = 1;
1061 		new_pc = rp->r_pc;
1062 		goto done;
1063 	}
1064 
1065 	/*
1066 	 * We emulate certain types of instructions to ensure correctness
1067 	 * (in the case of position dependent instructions) or optimize
1068 	 * common cases. The rest we have the thread execute back in user-
1069 	 * land.
1070 	 */
1071 	switch (tp->ftt_type) {
1072 	case FASTTRAP_T_RET:
1073 	case FASTTRAP_T_RET16:
1074 	{
1075 		uintptr_t dst;
1076 		uintptr_t addr;
1077 		int ret;
1078 
1079 		/*
1080 		 * We have to emulate _every_ facet of the behavior of a ret
1081 		 * instruction including what happens if the load from %esp
1082 		 * fails; in that case, we send a SIGSEGV.
1083 		 */
1084 #ifdef __amd64
1085 		if (p->p_model == DATAMODEL_NATIVE) {
1086 #endif
1087 			ret = fasttrap_fulword((void *)rp->r_sp, &dst);
1088 			addr = rp->r_sp + sizeof (uintptr_t);
1089 #ifdef __amd64
1090 		} else {
1091 			uint32_t dst32;
1092 			ret = fasttrap_fuword32((void *)rp->r_sp, &dst32);
1093 			dst = dst32;
1094 			addr = rp->r_sp + sizeof (uint32_t);
1095 		}
1096 #endif
1097 
1098 		if (ret == -1) {
1099 			fasttrap_sigsegv(p, curthread, rp->r_sp);
1100 			new_pc = pc;
1101 			break;
1102 		}
1103 
1104 		if (tp->ftt_type == FASTTRAP_T_RET16)
1105 			addr += tp->ftt_dest;
1106 
1107 		rp->r_sp = addr;
1108 		new_pc = dst;
1109 		break;
1110 	}
1111 
1112 	case FASTTRAP_T_JCC:
1113 	{
1114 		uint_t taken;
1115 
1116 		switch (tp->ftt_code) {
1117 		case FASTTRAP_JO:
1118 			taken = (rp->r_ps & FASTTRAP_EFLAGS_OF) != 0;
1119 			break;
1120 		case FASTTRAP_JNO:
1121 			taken = (rp->r_ps & FASTTRAP_EFLAGS_OF) == 0;
1122 			break;
1123 		case FASTTRAP_JB:
1124 			taken = (rp->r_ps & FASTTRAP_EFLAGS_CF) != 0;
1125 			break;
1126 		case FASTTRAP_JAE:
1127 			taken = (rp->r_ps & FASTTRAP_EFLAGS_CF) == 0;
1128 			break;
1129 		case FASTTRAP_JE:
1130 			taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) != 0;
1131 			break;
1132 		case FASTTRAP_JNE:
1133 			taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) == 0;
1134 			break;
1135 		case FASTTRAP_JBE:
1136 			taken = (rp->r_ps & FASTTRAP_EFLAGS_CF) != 0 ||
1137 			    (rp->r_ps & FASTTRAP_EFLAGS_ZF) != 0;
1138 			break;
1139 		case FASTTRAP_JA:
1140 			taken = (rp->r_ps & FASTTRAP_EFLAGS_CF) == 0 &&
1141 			    (rp->r_ps & FASTTRAP_EFLAGS_ZF) == 0;
1142 			break;
1143 		case FASTTRAP_JS:
1144 			taken = (rp->r_ps & FASTTRAP_EFLAGS_SF) != 0;
1145 			break;
1146 		case FASTTRAP_JNS:
1147 			taken = (rp->r_ps & FASTTRAP_EFLAGS_SF) == 0;
1148 			break;
1149 		case FASTTRAP_JP:
1150 			taken = (rp->r_ps & FASTTRAP_EFLAGS_PF) != 0;
1151 			break;
1152 		case FASTTRAP_JNP:
1153 			taken = (rp->r_ps & FASTTRAP_EFLAGS_PF) == 0;
1154 			break;
1155 		case FASTTRAP_JL:
1156 			taken = ((rp->r_ps & FASTTRAP_EFLAGS_SF) == 0) !=
1157 			    ((rp->r_ps & FASTTRAP_EFLAGS_OF) == 0);
1158 			break;
1159 		case FASTTRAP_JGE:
1160 			taken = ((rp->r_ps & FASTTRAP_EFLAGS_SF) == 0) ==
1161 			    ((rp->r_ps & FASTTRAP_EFLAGS_OF) == 0);
1162 			break;
1163 		case FASTTRAP_JLE:
1164 			taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) != 0 ||
1165 			    ((rp->r_ps & FASTTRAP_EFLAGS_SF) == 0) !=
1166 			    ((rp->r_ps & FASTTRAP_EFLAGS_OF) == 0);
1167 			break;
1168 		case FASTTRAP_JG:
1169 			taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) == 0 &&
1170 			    ((rp->r_ps & FASTTRAP_EFLAGS_SF) == 0) ==
1171 			    ((rp->r_ps & FASTTRAP_EFLAGS_OF) == 0);
1172 			break;
1173 
1174 		}
1175 
1176 		if (taken)
1177 			new_pc = tp->ftt_dest;
1178 		else
1179 			new_pc = pc + tp->ftt_size;
1180 		break;
1181 	}
1182 
1183 	case FASTTRAP_T_LOOP:
1184 	{
1185 		uint_t taken;
1186 #ifdef __amd64
1187 		greg_t cx = rp->r_rcx--;
1188 #else
1189 		greg_t cx = rp->r_ecx--;
1190 #endif
1191 
1192 		switch (tp->ftt_code) {
1193 		case FASTTRAP_LOOPNZ:
1194 			taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) == 0 &&
1195 			    cx != 0;
1196 			break;
1197 		case FASTTRAP_LOOPZ:
1198 			taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) != 0 &&
1199 			    cx != 0;
1200 			break;
1201 		case FASTTRAP_LOOP:
1202 			taken = (cx != 0);
1203 			break;
1204 		}
1205 
1206 		if (taken)
1207 			new_pc = tp->ftt_dest;
1208 		else
1209 			new_pc = pc + tp->ftt_size;
1210 		break;
1211 	}
1212 
1213 	case FASTTRAP_T_JCXZ:
1214 	{
1215 #ifdef __amd64
1216 		greg_t cx = rp->r_rcx;
1217 #else
1218 		greg_t cx = rp->r_ecx;
1219 #endif
1220 
1221 		if (cx == 0)
1222 			new_pc = tp->ftt_dest;
1223 		else
1224 			new_pc = pc + tp->ftt_size;
1225 		break;
1226 	}
1227 
1228 	case FASTTRAP_T_PUSHL_EBP:
1229 	{
1230 		int ret;
1231 		uintptr_t addr;
1232 #ifdef __amd64
1233 		if (p->p_model == DATAMODEL_NATIVE) {
1234 #endif
1235 			addr = rp->r_sp - sizeof (uintptr_t);
1236 			ret = fasttrap_sulword((void *)addr, rp->r_fp);
1237 #ifdef __amd64
1238 		} else {
1239 			addr = rp->r_sp - sizeof (uint32_t);
1240 			ret = fasttrap_suword32((void *)addr,
1241 			    (uint32_t)rp->r_fp);
1242 		}
1243 #endif
1244 
1245 		if (ret == -1) {
1246 			fasttrap_sigsegv(p, curthread, addr);
1247 			new_pc = pc;
1248 			break;
1249 		}
1250 
1251 		rp->r_sp = addr;
1252 		new_pc = pc + tp->ftt_size;
1253 		break;
1254 	}
1255 
1256 	case FASTTRAP_T_JMP:
1257 	case FASTTRAP_T_CALL:
1258 		if (tp->ftt_code == 0) {
1259 			new_pc = tp->ftt_dest;
1260 		} else {
1261 			uintptr_t value, addr = tp->ftt_dest;
1262 
1263 			if (tp->ftt_base != FASTTRAP_NOREG)
1264 				addr += fasttrap_getreg(rp, tp->ftt_base);
1265 			if (tp->ftt_index != FASTTRAP_NOREG)
1266 				addr += fasttrap_getreg(rp, tp->ftt_index) <<
1267 				    tp->ftt_scale;
1268 
1269 			if (tp->ftt_code == 1) {
1270 				/*
1271 				 * If there's a segment prefix for this
1272 				 * instruction, we'll need to check permissions
1273 				 * and bounds on the given selector, and adjust
1274 				 * the address accordingly.
1275 				 */
1276 				if (tp->ftt_segment != FASTTRAP_SEG_NONE &&
1277 				    fasttrap_do_seg(tp, rp, &addr) != 0) {
1278 					fasttrap_sigsegv(p, curthread, addr);
1279 					new_pc = pc;
1280 					break;
1281 				}
1282 
1283 #ifdef __amd64
1284 				if (p->p_model == DATAMODEL_NATIVE) {
1285 #endif
1286 					if (fasttrap_fulword((void *)addr,
1287 					    &value) == -1) {
1288 						fasttrap_sigsegv(p, curthread,
1289 						    addr);
1290 						new_pc = pc;
1291 						break;
1292 					}
1293 					new_pc = value;
1294 #ifdef __amd64
1295 				} else {
1296 					uint32_t value32;
1297 					addr = (uintptr_t)(uint32_t)addr;
1298 					if (fasttrap_fuword32((void *)addr,
1299 					    &value32) == -1) {
1300 						fasttrap_sigsegv(p, curthread,
1301 						    addr);
1302 						new_pc = pc;
1303 						break;
1304 					}
1305 					new_pc = value32;
1306 				}
1307 #endif
1308 			} else {
1309 				new_pc = addr;
1310 			}
1311 		}
1312 
1313 		/*
1314 		 * If this is a call instruction, we need to push the return
1315 		 * address onto the stack. If this fails, we send the process
1316 		 * a SIGSEGV and reset the pc to emulate what would happen if
1317 		 * this instruction weren't traced.
1318 		 */
1319 		if (tp->ftt_type == FASTTRAP_T_CALL) {
1320 			int ret;
1321 			uintptr_t addr;
1322 #ifdef __amd64
1323 			if (p->p_model == DATAMODEL_NATIVE) {
1324 				addr = rp->r_sp - sizeof (uintptr_t);
1325 				ret = fasttrap_sulword((void *)addr,
1326 				    pc + tp->ftt_size);
1327 			} else {
1328 #endif
1329 				addr = rp->r_sp - sizeof (uint32_t);
1330 				ret = fasttrap_suword32((void *)addr,
1331 				    (uint32_t)(pc + tp->ftt_size));
1332 #ifdef __amd64
1333 			}
1334 #endif
1335 
1336 			if (ret == -1) {
1337 				fasttrap_sigsegv(p, curthread, addr);
1338 				new_pc = pc;
1339 				break;
1340 			}
1341 
1342 			rp->r_sp = addr;
1343 		}
1344 
1345 		break;
1346 
1347 	case FASTTRAP_T_COMMON:
1348 	{
1349 		uintptr_t addr;
1350 		uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 5 + 2];
1351 		uint_t i = 0;
1352 		klwp_t *lwp = ttolwp(curthread);
1353 
1354 		/*
1355 		 * Compute the address of the ulwp_t and step over the
1356 		 * ul_self pointer. The method used to store the user-land
1357 		 * thread pointer is very different on 32- and 64-bit
1358 		 * kernels.
1359 		 */
1360 #if defined(__amd64)
1361 		if (p->p_model == DATAMODEL_LP64) {
1362 			addr = lwp->lwp_pcb.pcb_fsbase;
1363 			addr += sizeof (void *);
1364 		} else {
1365 			addr = lwp->lwp_pcb.pcb_gsbase;
1366 			addr += sizeof (caddr32_t);
1367 		}
1368 #elif defined(__i386)
1369 		addr = USEGD_GETBASE(&lwp->lwp_pcb.pcb_gsdesc);
1370 		addr += sizeof (void *);
1371 #endif
1372 
1373 		/*
1374 		 * Generic Instruction Tracing
1375 		 * ---------------------------
1376 		 *
1377 		 * This is the layout of the scratch space in the user-land
1378 		 * thread structure for our generated instructions.
1379 		 *
1380 		 *	32-bit mode			bytes
1381 		 *	------------------------	-----
1382 		 * a:	<original instruction>		<= 15
1383 		 *	jmp	<pc + tp->ftt_size>	    5
1384 		 * b:	<original instrction>		<= 15
1385 		 *	int	T_DTRACE_RET		    2
1386 		 *					-----
1387 		 *					<= 37
1388 		 *
1389 		 *	64-bit mode			bytes
1390 		 *	------------------------	-----
1391 		 * a:	<original instruction>		<= 15
1392 		 *	jmp	0(%rip)			    6
1393 		 *	<pc + tp->ftt_size>		    8
1394 		 * b:	<original instruction>		<= 15
1395 		 * 	int	T_DTRACE_RET		    2
1396 		 * 					-----
1397 		 * 					<= 46
1398 		 *
1399 		 * The %pc is set to a, and curthread->t_dtrace_astpc is set
1400 		 * to b. If we encounter a signal on the way out of the
1401 		 * kernel, trap() will set %pc to curthread->t_dtrace_astpc
1402 		 * so that we execute the original instruction and re-enter
1403 		 * the kernel rather than redirecting to the next instruction.
1404 		 *
1405 		 * If there are return probes (so we know that we're going to
1406 		 * need to reenter the kernel after executing the original
1407 		 * instruction), the scratch space will just contain the
1408 		 * original instruction followed by an interrupt -- the same
1409 		 * data as at b.
1410 		 *
1411 		 * %rip-relative Addressing
1412 		 * ------------------------
1413 		 *
1414 		 * There's a further complication in 64-bit mode due to %rip-
1415 		 * relative addressing. While this is clearly a beneficial
1416 		 * architectural decision for position independent code, it's
1417 		 * hard not to see it as a personal attack against the pid
1418 		 * provider since before there was a relatively small set of
1419 		 * instructions to emulate; with %rip-relative addressing,
1420 		 * almost every instruction can potentially depend on the
1421 		 * address at which it's executed. Rather than emulating
1422 		 * the broad spectrum of instructions that can now be
1423 		 * position dependent, we emulate jumps and others as in
1424 		 * 32-bit mode, and take a different tack for instructions
1425 		 * using %rip-relative addressing.
1426 		 *
1427 		 * For every instruction that uses the ModRM byte, the
1428 		 * in-kernel disassembler reports its location. We use the
1429 		 * ModRM byte to identify that an instruction uses
1430 		 * %rip-relative addressing and to see what other registers
1431 		 * the instruction uses. To emulate those instructions,
1432 		 * we modify the instruction to be %rax-relative rather than
1433 		 * %rip-relative (or %rcx-relative if the instruction uses
1434 		 * %rax; or %r8- or %r9-relative if the REX.B is present so
1435 		 * we don't have to rewrite the REX prefix). We then load
1436 		 * the value that %rip would have been into the scratch
1437 		 * register and generate an instruction to reset the scratch
1438 		 * register back to its original value. The instruction
1439 		 * sequence looks like this:
1440 		 *
1441 		 *	64-mode %rip-relative		bytes
1442 		 *	------------------------	-----
1443 		 * a:	<modified instruction>		<= 15
1444 		 *	movq	$<value>, %<scratch>	    6
1445 		 *	jmp	0(%rip)			    6
1446 		 *	<pc + tp->ftt_size>		    8
1447 		 * b:	<modified instruction>  	<= 15
1448 		 * 	int	T_DTRACE_RET		    2
1449 		 * 					-----
1450 		 *					   52
1451 		 *
1452 		 * We set curthread->t_dtrace_regv so that upon receiving
1453 		 * a signal we can reset the value of the scratch register.
1454 		 */
1455 
1456 		ASSERT(tp->ftt_size < FASTTRAP_MAX_INSTR_SIZE);
1457 
1458 		curthread->t_dtrace_scrpc = addr;
1459 		bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size);
1460 		i += tp->ftt_size;
1461 
1462 #ifdef __amd64
1463 		if (tp->ftt_ripmode != 0) {
1464 			greg_t *reg;
1465 
1466 			ASSERT(p->p_model == DATAMODEL_LP64);
1467 			ASSERT(tp->ftt_ripmode &
1468 			    (FASTTRAP_RIP_1 | FASTTRAP_RIP_2));
1469 
1470 			/*
1471 			 * If this was a %rip-relative instruction, we change
1472 			 * it to be either a %rax- or %rcx-relative
1473 			 * instruction (depending on whether those registers
1474 			 * are used as another operand; or %r8- or %r9-
1475 			 * relative depending on the value of REX.B). We then
1476 			 * set that register and generate a movq instruction
1477 			 * to reset the value.
1478 			 */
1479 			if (tp->ftt_ripmode & FASTTRAP_RIP_X)
1480 				scratch[i++] = FASTTRAP_REX(1, 0, 0, 1);
1481 			else
1482 				scratch[i++] = FASTTRAP_REX(1, 0, 0, 0);
1483 
1484 			if (tp->ftt_ripmode & FASTTRAP_RIP_1)
1485 				scratch[i++] = FASTTRAP_MOV_EAX;
1486 			else
1487 				scratch[i++] = FASTTRAP_MOV_ECX;
1488 
1489 			switch (tp->ftt_ripmode) {
1490 			case FASTTRAP_RIP_1:
1491 				reg = &rp->r_rax;
1492 				curthread->t_dtrace_reg = REG_RAX;
1493 				break;
1494 			case FASTTRAP_RIP_2:
1495 				reg = &rp->r_rcx;
1496 				curthread->t_dtrace_reg = REG_RCX;
1497 				break;
1498 			case FASTTRAP_RIP_1 | FASTTRAP_RIP_X:
1499 				reg = &rp->r_r8;
1500 				curthread->t_dtrace_reg = REG_R8;
1501 				break;
1502 			case FASTTRAP_RIP_2 | FASTTRAP_RIP_X:
1503 				reg = &rp->r_r9;
1504 				curthread->t_dtrace_reg = REG_R9;
1505 				break;
1506 			}
1507 
1508 			*(uint64_t *)&scratch[i] = *reg;
1509 			curthread->t_dtrace_regv = *reg;
1510 			*reg = pc + tp->ftt_size;
1511 			i += sizeof (uint64_t);
1512 		}
1513 #endif
1514 
1515 		/*
1516 		 * Generate the branch instruction to what would have
1517 		 * normally been the subsequent instruction. In 32-bit mode,
1518 		 * this is just a relative branch; in 64-bit mode this is a
1519 		 * %rip-relative branch that loads the 64-bit pc value
1520 		 * immediately after the jmp instruction.
1521 		 */
1522 #ifdef __amd64
1523 		if (p->p_model == DATAMODEL_LP64) {
1524 			scratch[i++] = FASTTRAP_GROUP5_OP;
1525 			scratch[i++] = FASTTRAP_MODRM(0, 4, 5);
1526 			*(uint32_t *)&scratch[i] = 0;
1527 			i += sizeof (uint32_t);
1528 			*(uint64_t *)&scratch[i] = pc + tp->ftt_size;
1529 			i += sizeof (uint64_t);
1530 		} else {
1531 #endif
1532 			/*
1533 			 * Set up the jmp to the next instruction; note that
1534 			 * the size of the traced instruction cancels out.
1535 			 */
1536 			scratch[i++] = FASTTRAP_JMP32;
1537 			*(uint32_t *)&scratch[i] = pc - addr - 5;
1538 			i += sizeof (uint32_t);
1539 #ifdef __amd64
1540 		}
1541 #endif
1542 
1543 		curthread->t_dtrace_astpc = addr + i;
1544 		bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size);
1545 		i += tp->ftt_size;
1546 		scratch[i++] = FASTTRAP_INT;
1547 		scratch[i++] = T_DTRACE_RET;
1548 
1549 		if (fasttrap_copyout(scratch, (char *)addr, i)) {
1550 			fasttrap_sigtrap(p, curthread, pc);
1551 			new_pc = pc;
1552 			break;
1553 		}
1554 
1555 		if (tp->ftt_retids != NULL) {
1556 			curthread->t_dtrace_step = 1;
1557 			curthread->t_dtrace_ret = 1;
1558 			new_pc = curthread->t_dtrace_astpc;
1559 		} else {
1560 			new_pc = curthread->t_dtrace_scrpc;
1561 		}
1562 
1563 		curthread->t_dtrace_pc = pc;
1564 		curthread->t_dtrace_npc = pc + tp->ftt_size;
1565 		curthread->t_dtrace_on = 1;
1566 		break;
1567 	}
1568 
1569 	default:
1570 		panic("fasttrap: mishandled an instruction");
1571 	}
1572 
1573 done:
1574 	/*
1575 	 * If there were no return probes when we first found the tracepoint,
1576 	 * we should feel no obligation to honor any return probes that were
1577 	 * subsequently enabled -- they'll just have to wait until the next
1578 	 * time around.
1579 	 */
1580 	if (tp->ftt_retids != NULL) {
1581 		/*
1582 		 * We need to wait until the results of the instruction are
1583 		 * apparent before invoking any return probes. If this
1584 		 * instruction was emulated we can just call
1585 		 * fasttrap_return_common(); if it needs to be executed, we
1586 		 * need to wait until the user thread returns to the kernel.
1587 		 */
1588 		if (tp->ftt_type != FASTTRAP_T_COMMON) {
1589 			/*
1590 			 * Set the program counter to the address of the traced
1591 			 * instruction so that it looks right in ustack()
1592 			 * output. We had previously set it to the end of the
1593 			 * instruction to simplify %rip-relative addressing.
1594 			 */
1595 			rp->r_pc = pc;
1596 
1597 			fasttrap_return_common(rp, pc, pid, new_pc);
1598 		} else {
1599 			ASSERT(curthread->t_dtrace_ret != 0);
1600 			ASSERT(curthread->t_dtrace_pc == pc);
1601 			ASSERT(curthread->t_dtrace_scrpc != 0);
1602 			ASSERT(new_pc == curthread->t_dtrace_astpc);
1603 		}
1604 	}
1605 
1606 	ASSERT(new_pc != 0);
1607 	rp->r_pc = new_pc;
1608 
1609 	return (0);
1610 }
1611 
1612 int
1613 fasttrap_return_probe(struct regs *rp)
1614 {
1615 	proc_t *p = curproc;
1616 	uintptr_t pc = curthread->t_dtrace_pc;
1617 	uintptr_t npc = curthread->t_dtrace_npc;
1618 
1619 	curthread->t_dtrace_pc = 0;
1620 	curthread->t_dtrace_npc = 0;
1621 	curthread->t_dtrace_scrpc = 0;
1622 	curthread->t_dtrace_astpc = 0;
1623 
1624 	/*
1625 	 * Treat a child created by a call to vfork(2) as if it were its
1626 	 * parent. We know that there's only one thread of control in such a
1627 	 * process: this one.
1628 	 */
1629 	while (p->p_flag & SVFORK) {
1630 		p = p->p_parent;
1631 	}
1632 
1633 	/*
1634 	 * We set rp->r_pc to the address of the traced instruction so
1635 	 * that it appears to dtrace_probe() that we're on the original
1636 	 * instruction, and so that the user can't easily detect our
1637 	 * complex web of lies. dtrace_return_probe() (our caller)
1638 	 * will correctly set %pc after we return.
1639 	 */
1640 	rp->r_pc = pc;
1641 
1642 	fasttrap_return_common(rp, pc, p->p_pid, npc);
1643 
1644 	return (0);
1645 }
1646 
1647 /*ARGSUSED*/
1648 uint64_t
1649 fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
1650     int aframes)
1651 {
1652 	return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, 1, argno));
1653 }
1654 
1655 /*ARGSUSED*/
1656 uint64_t
1657 fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
1658     int aframes)
1659 {
1660 	return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, 0, argno));
1661 }
1662 
1663 static ulong_t
1664 fasttrap_getreg(struct regs *rp, uint_t reg)
1665 {
1666 #ifdef __amd64
1667 	switch (reg) {
1668 	case REG_R15:		return (rp->r_r15);
1669 	case REG_R14:		return (rp->r_r14);
1670 	case REG_R13:		return (rp->r_r13);
1671 	case REG_R12:		return (rp->r_r12);
1672 	case REG_R11:		return (rp->r_r11);
1673 	case REG_R10:		return (rp->r_r10);
1674 	case REG_R9:		return (rp->r_r9);
1675 	case REG_R8:		return (rp->r_r8);
1676 	case REG_RDI:		return (rp->r_rdi);
1677 	case REG_RSI:		return (rp->r_rsi);
1678 	case REG_RBP:		return (rp->r_rbp);
1679 	case REG_RBX:		return (rp->r_rbx);
1680 	case REG_RDX:		return (rp->r_rdx);
1681 	case REG_RCX:		return (rp->r_rcx);
1682 	case REG_RAX:		return (rp->r_rax);
1683 	case REG_TRAPNO:	return (rp->r_trapno);
1684 	case REG_ERR:		return (rp->r_err);
1685 	case REG_RIP:		return (rp->r_rip);
1686 	case REG_CS:		return (rp->r_cs);
1687 	case REG_RFL:		return (rp->r_rfl);
1688 	case REG_RSP:		return (rp->r_rsp);
1689 	case REG_SS:		return (rp->r_ss);
1690 	case REG_FS:		return (rp->r_fs);
1691 	case REG_GS:		return (rp->r_gs);
1692 	case REG_DS:		return (rp->r_ds);
1693 	case REG_ES:		return (rp->r_es);
1694 	case REG_FSBASE:	return (rp->r_fsbase);
1695 	case REG_GSBASE:	return (rp->r_gsbase);
1696 	}
1697 
1698 	panic("dtrace: illegal register constant");
1699 	/*NOTREACHED*/
1700 #else
1701 	if (reg >= _NGREG)
1702 		panic("dtrace: illegal register constant");
1703 
1704 	return (((greg_t *)&rp->r_gs)[reg]);
1705 #endif
1706 }
1707