xref: /freebsd/sys/cddl/dev/dtrace/amd64/dtrace_isa.c (revision 119b75925c562202145d7bac7b676b98029c6cb9)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  *
22  * $FreeBSD$
23  */
24 /*
25  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
26  * Use is subject to license terms.
27  */
28 #include <sys/cdefs.h>
29 
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/kernel.h>
33 #include <sys/stack.h>
34 #include <sys/pcpu.h>
35 
36 #include <machine/frame.h>
37 #include <machine/md_var.h>
38 #include <machine/reg.h>
39 #include <machine/stack.h>
40 
41 #include <vm/vm.h>
42 #include <vm/vm_param.h>
43 #include <vm/pmap.h>
44 
45 #include "regset.h"
46 
47 uint8_t dtrace_fuword8_nocheck(void *);
48 uint16_t dtrace_fuword16_nocheck(void *);
49 uint32_t dtrace_fuword32_nocheck(void *);
50 uint64_t dtrace_fuword64_nocheck(void *);
51 
52 int	dtrace_ustackdepth_max = 2048;
53 
54 void
55 dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes,
56     uint32_t *intrpc)
57 {
58 	int depth = 0;
59 	register_t rbp;
60 	struct amd64_frame *frame;
61 	vm_offset_t callpc;
62 	pc_t caller = (pc_t) solaris_cpu[curcpu].cpu_dtrace_caller;
63 
64 	if (intrpc != 0)
65 		pcstack[depth++] = (pc_t) intrpc;
66 
67 	aframes++;
68 
69 	__asm __volatile("movq %%rbp,%0" : "=r" (rbp));
70 
71 	frame = (struct amd64_frame *)rbp;
72 	while (depth < pcstack_limit) {
73 		if (!INKERNEL((long) frame))
74 			break;
75 
76 		callpc = frame->f_retaddr;
77 
78 		if (!INKERNEL(callpc))
79 			break;
80 
81 		if (aframes > 0) {
82 			aframes--;
83 			if ((aframes == 0) && (caller != 0)) {
84 				pcstack[depth++] = caller;
85 			}
86 		}
87 		else {
88 			pcstack[depth++] = callpc;
89 		}
90 
91 		if (frame->f_frame <= frame ||
92 		    (vm_offset_t)frame->f_frame >= curthread->td_kstack +
93 		    curthread->td_kstack_pages * PAGE_SIZE)
94 			break;
95 		frame = frame->f_frame;
96 	}
97 
98 	for (; depth < pcstack_limit; depth++) {
99 		pcstack[depth] = 0;
100 	}
101 }
102 
103 static int
104 dtrace_getustack_common(uint64_t *pcstack, int pcstack_limit, uintptr_t pc,
105     uintptr_t sp)
106 {
107 	uintptr_t oldsp;
108 	volatile uint16_t *flags =
109 	    (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
110 	int ret = 0;
111 
112 	ASSERT(pcstack == NULL || pcstack_limit > 0);
113 	ASSERT(dtrace_ustackdepth_max > 0);
114 
115 	while (pc != 0) {
116 		/*
117 		 * We limit the number of times we can go around this
118 		 * loop to account for a circular stack.
119 		 */
120 		if (ret++ >= dtrace_ustackdepth_max) {
121 			*flags |= CPU_DTRACE_BADSTACK;
122 			cpu_core[curcpu].cpuc_dtrace_illval = sp;
123 			break;
124 		}
125 
126 		if (pcstack != NULL) {
127 			*pcstack++ = (uint64_t)pc;
128 			pcstack_limit--;
129 			if (pcstack_limit <= 0)
130 				break;
131 		}
132 
133 		if (sp == 0)
134 			break;
135 
136 		oldsp = sp;
137 
138 		pc = dtrace_fuword64((void *)(sp +
139 			offsetof(struct amd64_frame, f_retaddr)));
140 		sp = dtrace_fuword64((void *)sp);
141 
142 		if (sp == oldsp) {
143 			*flags |= CPU_DTRACE_BADSTACK;
144 			cpu_core[curcpu].cpuc_dtrace_illval = sp;
145 			break;
146 		}
147 
148 		/*
149 		 * This is totally bogus:  if we faulted, we're going to clear
150 		 * the fault and break.  This is to deal with the apparently
151 		 * broken Java stacks on x86.
152 		 */
153 		if (*flags & CPU_DTRACE_FAULT) {
154 			*flags &= ~CPU_DTRACE_FAULT;
155 			break;
156 		}
157 	}
158 
159 	return (ret);
160 }
161 
162 void
163 dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit)
164 {
165 	proc_t *p = curproc;
166 	struct trapframe *tf;
167 	uintptr_t pc, sp, fp;
168 	volatile uint16_t *flags =
169 	    (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
170 	int n;
171 
172 	if (*flags & CPU_DTRACE_FAULT)
173 		return;
174 
175 	if (pcstack_limit <= 0)
176 		return;
177 
178 	/*
179 	 * If there's no user context we still need to zero the stack.
180 	 */
181 	if (p == NULL || (tf = curthread->td_frame) == NULL)
182 		goto zero;
183 
184 	*pcstack++ = (uint64_t)p->p_pid;
185 	pcstack_limit--;
186 
187 	if (pcstack_limit <= 0)
188 		return;
189 
190 	pc = tf->tf_rip;
191 	fp = tf->tf_rbp;
192 	sp = tf->tf_rsp;
193 
194 	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
195 		/*
196 		 * In an entry probe.  The frame pointer has not yet been
197 		 * pushed (that happens in the function prologue).  The
198 		 * best approach is to add the current pc as a missing top
199 		 * of stack and back the pc up to the caller, which is stored
200 		 * at the current stack pointer address since the call
201 		 * instruction puts it there right before the branch.
202 		 */
203 
204 		*pcstack++ = (uint64_t)pc;
205 		pcstack_limit--;
206 		if (pcstack_limit <= 0)
207 			return;
208 
209 		pc = dtrace_fuword64((void *) sp);
210 	}
211 
212 	n = dtrace_getustack_common(pcstack, pcstack_limit, pc, fp);
213 	ASSERT(n >= 0);
214 	ASSERT(n <= pcstack_limit);
215 
216 	pcstack += n;
217 	pcstack_limit -= n;
218 
219 zero:
220 	while (pcstack_limit-- > 0)
221 		*pcstack++ = 0;
222 }
223 
224 int
225 dtrace_getustackdepth(void)
226 {
227 	proc_t *p = curproc;
228 	struct trapframe *tf;
229 	uintptr_t pc, fp, sp;
230 	int n = 0;
231 
232 	if (p == NULL || (tf = curthread->td_frame) == NULL)
233 		return (0);
234 
235 	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT))
236 		return (-1);
237 
238 	pc = tf->tf_rip;
239 	fp = tf->tf_rbp;
240 	sp = tf->tf_rsp;
241 
242 	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
243 		/*
244 		 * In an entry probe.  The frame pointer has not yet been
245 		 * pushed (that happens in the function prologue).  The
246 		 * best approach is to add the current pc as a missing top
247 		 * of stack and back the pc up to the caller, which is stored
248 		 * at the current stack pointer address since the call
249 		 * instruction puts it there right before the branch.
250 		 */
251 
252 		pc = dtrace_fuword64((void *) sp);
253 		n++;
254 	}
255 
256 	n += dtrace_getustack_common(NULL, 0, pc, fp);
257 
258 	return (n);
259 }
260 
261 void
262 dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit)
263 {
264 	proc_t *p = curproc;
265 	struct trapframe *tf;
266 	uintptr_t pc, sp, fp;
267 	volatile uint16_t *flags =
268 	    (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
269 #ifdef notyet	/* XXX signal stack */
270 	uintptr_t oldcontext;
271 	size_t s1, s2;
272 #endif
273 
274 	if (*flags & CPU_DTRACE_FAULT)
275 		return;
276 
277 	if (pcstack_limit <= 0)
278 		return;
279 
280 	/*
281 	 * If there's no user context we still need to zero the stack.
282 	 */
283 	if (p == NULL || (tf = curthread->td_frame) == NULL)
284 		goto zero;
285 
286 	*pcstack++ = (uint64_t)p->p_pid;
287 	pcstack_limit--;
288 
289 	if (pcstack_limit <= 0)
290 		return;
291 
292 	pc = tf->tf_rip;
293 	sp = tf->tf_rsp;
294 	fp = tf->tf_rbp;
295 
296 #ifdef notyet /* XXX signal stack */
297 	oldcontext = lwp->lwp_oldcontext;
298 	s1 = sizeof (struct xframe) + 2 * sizeof (long);
299 	s2 = s1 + sizeof (siginfo_t);
300 #endif
301 
302 	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
303 		*pcstack++ = (uint64_t)pc;
304 		*fpstack++ = 0;
305 		pcstack_limit--;
306 		if (pcstack_limit <= 0)
307 			return;
308 
309 		pc = dtrace_fuword64((void *)sp);
310 	}
311 
312 	while (pc != 0) {
313 		*pcstack++ = (uint64_t)pc;
314 		*fpstack++ = fp;
315 		pcstack_limit--;
316 		if (pcstack_limit <= 0)
317 			break;
318 
319 		if (fp == 0)
320 			break;
321 
322 #ifdef notyet /* XXX signal stack */
323 		if (oldcontext == sp + s1 || oldcontext == sp + s2) {
324 			ucontext_t *ucp = (ucontext_t *)oldcontext;
325 			greg_t *gregs = ucp->uc_mcontext.gregs;
326 
327 			sp = dtrace_fulword(&gregs[REG_FP]);
328 			pc = dtrace_fulword(&gregs[REG_PC]);
329 
330 			oldcontext = dtrace_fulword(&ucp->uc_link);
331 		} else
332 #endif /* XXX */
333 		{
334 			pc = dtrace_fuword64((void *)(fp +
335 				offsetof(struct amd64_frame, f_retaddr)));
336 			fp = dtrace_fuword64((void *)fp);
337 		}
338 
339 		/*
340 		 * This is totally bogus:  if we faulted, we're going to clear
341 		 * the fault and break.  This is to deal with the apparently
342 		 * broken Java stacks on x86.
343 		 */
344 		if (*flags & CPU_DTRACE_FAULT) {
345 			*flags &= ~CPU_DTRACE_FAULT;
346 			break;
347 		}
348 	}
349 
350 zero:
351 	while (pcstack_limit-- > 0)
352 		*pcstack++ = 0;
353 }
354 
355 /*ARGSUSED*/
356 uint64_t
357 dtrace_getarg(int arg, int aframes)
358 {
359 	uintptr_t val;
360 	struct amd64_frame *fp = (struct amd64_frame *)dtrace_getfp();
361 	uintptr_t *stack;
362 	int i;
363 
364 	/*
365 	 * A total of 6 arguments are passed via registers; any argument with
366 	 * index of 5 or lower is therefore in a register.
367 	 */
368 	int inreg = 5;
369 
370 	for (i = 1; i <= aframes; i++) {
371 		fp = fp->f_frame;
372 
373 		if (P2ROUNDUP(fp->f_retaddr, 16) ==
374 		    (long)dtrace_invop_callsite) {
375 			/*
376 			 * In the case of amd64, we will use the pointer to the
377 			 * regs structure that was pushed when we took the
378 			 * trap.  To get this structure, we must increment
379 			 * beyond the frame structure, and then again beyond
380 			 * the calling RIP stored in dtrace_invop().  If the
381 			 * argument that we're seeking is passed on the stack,
382 			 * we'll pull the true stack pointer out of the saved
383 			 * registers and decrement our argument by the number
384 			 * of arguments passed in registers; if the argument
385 			 * we're seeking is passed in regsiters, we can just
386 			 * load it directly.
387 			 */
388 			struct trapframe *tf =
389 			    (struct trapframe *)((uintptr_t)&fp[1]);
390 
391 			if (arg <= inreg) {
392 				switch (arg) {
393 				case 0:
394 					stack = (uintptr_t *)&tf->tf_rdi;
395 					break;
396 				case 1:
397 					stack = (uintptr_t *)&tf->tf_rsi;
398 					break;
399 				case 2:
400 					stack = (uintptr_t *)&tf->tf_rdx;
401 					break;
402 				case 3:
403 					stack = (uintptr_t *)&tf->tf_rcx;
404 					break;
405 				case 4:
406 					stack = (uintptr_t *)&tf->tf_r8;
407 					break;
408 				case 5:
409 					stack = (uintptr_t *)&tf->tf_r9;
410 					break;
411 				}
412 				arg = 0;
413 			} else {
414 				stack = (uintptr_t *)(tf->tf_rsp);
415 				arg -= inreg;
416 			}
417 			goto load;
418 		}
419 
420 	}
421 
422 	/*
423 	 * We know that we did not come through a trap to get into
424 	 * dtrace_probe() -- the provider simply called dtrace_probe()
425 	 * directly.  As this is the case, we need to shift the argument
426 	 * that we're looking for:  the probe ID is the first argument to
427 	 * dtrace_probe(), so the argument n will actually be found where
428 	 * one would expect to find argument (n + 1).
429 	 */
430 	arg++;
431 
432 	if (arg <= inreg) {
433 		/*
434 		 * This shouldn't happen.  If the argument is passed in a
435 		 * register then it should have been, well, passed in a
436 		 * register...
437 		 */
438 		DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
439 		return (0);
440 	}
441 
442 	arg -= (inreg + 1);
443 	stack = (uintptr_t *)fp + 2;
444 
445 load:
446 	DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
447 	val = stack[arg];
448 	DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
449 
450 	return (val);
451 	return (0);
452 }
453 
454 int
455 dtrace_getstackdepth(int aframes)
456 {
457 	int depth = 0;
458 	struct amd64_frame *frame;
459 	vm_offset_t rbp;
460 
461 	aframes++;
462 	rbp = dtrace_getfp();
463 	frame = (struct amd64_frame *)rbp;
464 	depth++;
465 	for(;;) {
466 		if (!INKERNEL((long) frame))
467 			break;
468 		if (!INKERNEL((long) frame->f_frame))
469 			break;
470 		depth++;
471 		if (frame->f_frame <= frame ||
472 		    (vm_offset_t)frame->f_frame >= curthread->td_kstack +
473 		    curthread->td_kstack_pages * PAGE_SIZE)
474 			break;
475 		frame = frame->f_frame;
476 	}
477 	if (depth < aframes)
478 		return 0;
479 	else
480 		return depth - aframes;
481 }
482 
483 ulong_t
484 dtrace_getreg(struct trapframe *rp, uint_t reg)
485 {
486 	/* This table is dependent on reg.d. */
487 	int regmap[] = {
488 		REG_GS,		/* 0  GS */
489 		REG_FS,		/* 1  FS */
490 		REG_ES,		/* 2  ES */
491 		REG_DS,		/* 3  DS */
492 		REG_RDI,	/* 4  EDI */
493 		REG_RSI,	/* 5  ESI */
494 		REG_RBP,	/* 6  EBP, REG_FP */
495 		REG_RSP,	/* 7  ESP */
496 		REG_RBX,	/* 8  EBX, REG_R1 */
497 		REG_RDX,	/* 9  EDX */
498 		REG_RCX,	/* 10 ECX */
499 		REG_RAX,	/* 11 EAX, REG_R0 */
500 		REG_TRAPNO,	/* 12 TRAPNO */
501 		REG_ERR,	/* 13 ERR */
502 		REG_RIP,	/* 14 EIP, REG_PC */
503 		REG_CS,		/* 15 CS */
504 		REG_RFL,	/* 16 EFL, REG_PS */
505 		REG_RSP,	/* 17 UESP, REG_SP */
506 		REG_SS		/* 18 SS */
507 	};
508 
509 	if (reg <= SS) {
510 		if (reg >= sizeof (regmap) / sizeof (int)) {
511 			DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
512 			return (0);
513 		}
514 
515 		reg = regmap[reg];
516 	} else {
517 		/* This is dependent on reg.d. */
518 		reg -= SS + 1;
519 	}
520 
521 	switch (reg) {
522 	case REG_RDI:
523 		return (rp->tf_rdi);
524 	case REG_RSI:
525 		return (rp->tf_rsi);
526 	case REG_RDX:
527 		return (rp->tf_rdx);
528 	case REG_RCX:
529 		return (rp->tf_rcx);
530 	case REG_R8:
531 		return (rp->tf_r8);
532 	case REG_R9:
533 		return (rp->tf_r9);
534 	case REG_RAX:
535 		return (rp->tf_rax);
536 	case REG_RBX:
537 		return (rp->tf_rbx);
538 	case REG_RBP:
539 		return (rp->tf_rbp);
540 	case REG_R10:
541 		return (rp->tf_r10);
542 	case REG_R11:
543 		return (rp->tf_r11);
544 	case REG_R12:
545 		return (rp->tf_r12);
546 	case REG_R13:
547 		return (rp->tf_r13);
548 	case REG_R14:
549 		return (rp->tf_r14);
550 	case REG_R15:
551 		return (rp->tf_r15);
552 	case REG_DS:
553 		return (rp->tf_ds);
554 	case REG_ES:
555 		return (rp->tf_es);
556 	case REG_FS:
557 		return (rp->tf_fs);
558 	case REG_GS:
559 		return (rp->tf_gs);
560 	case REG_TRAPNO:
561 		return (rp->tf_trapno);
562 	case REG_ERR:
563 		return (rp->tf_err);
564 	case REG_RIP:
565 		return (rp->tf_rip);
566 	case REG_CS:
567 		return (rp->tf_cs);
568 	case REG_SS:
569 		return (rp->tf_ss);
570 	case REG_RFL:
571 		return (rp->tf_rflags);
572 	case REG_RSP:
573 		return (rp->tf_rsp);
574 	default:
575 		DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
576 		return (0);
577 	}
578 }
579 
580 static int
581 dtrace_copycheck(uintptr_t uaddr, uintptr_t kaddr, size_t size)
582 {
583 	ASSERT(INKERNEL(kaddr) && kaddr + size >= kaddr);
584 
585 	if (uaddr + size > VM_MAXUSER_ADDRESS || uaddr + size < uaddr) {
586 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
587 		cpu_core[curcpu].cpuc_dtrace_illval = uaddr;
588 		return (0);
589 	}
590 
591 	return (1);
592 }
593 
594 void
595 dtrace_copyin(uintptr_t uaddr, uintptr_t kaddr, size_t size,
596     volatile uint16_t *flags)
597 {
598 	if (dtrace_copycheck(uaddr, kaddr, size))
599 		dtrace_copy(uaddr, kaddr, size);
600 }
601 
602 void
603 dtrace_copyout(uintptr_t kaddr, uintptr_t uaddr, size_t size,
604     volatile uint16_t *flags)
605 {
606 	if (dtrace_copycheck(uaddr, kaddr, size))
607 		dtrace_copy(kaddr, uaddr, size);
608 }
609 
610 void
611 dtrace_copyinstr(uintptr_t uaddr, uintptr_t kaddr, size_t size,
612     volatile uint16_t *flags)
613 {
614 	if (dtrace_copycheck(uaddr, kaddr, size))
615 		dtrace_copystr(uaddr, kaddr, size, flags);
616 }
617 
618 void
619 dtrace_copyoutstr(uintptr_t kaddr, uintptr_t uaddr, size_t size,
620     volatile uint16_t *flags)
621 {
622 	if (dtrace_copycheck(uaddr, kaddr, size))
623 		dtrace_copystr(kaddr, uaddr, size, flags);
624 }
625 
626 uint8_t
627 dtrace_fuword8(void *uaddr)
628 {
629 	if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
630 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
631 		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
632 		return (0);
633 	}
634 	return (dtrace_fuword8_nocheck(uaddr));
635 }
636 
637 uint16_t
638 dtrace_fuword16(void *uaddr)
639 {
640 	if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
641 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
642 		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
643 		return (0);
644 	}
645 	return (dtrace_fuword16_nocheck(uaddr));
646 }
647 
648 uint32_t
649 dtrace_fuword32(void *uaddr)
650 {
651 	if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
652 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
653 		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
654 		return (0);
655 	}
656 	return (dtrace_fuword32_nocheck(uaddr));
657 }
658 
659 uint64_t
660 dtrace_fuword64(void *uaddr)
661 {
662 	if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
663 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
664 		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
665 		return (0);
666 	}
667 	return (dtrace_fuword64_nocheck(uaddr));
668 }
669