xref: /freebsd/sys/cddl/dev/dtrace/amd64/dtrace_isa.c (revision 5eadfbfcd01e23d3de1958c013ca556dbf0b5458)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  *
22  * $FreeBSD$
23  */
24 /*
25  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
26  * Use is subject to license terms.
27  */
28 #include <sys/cdefs.h>
29 
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/kernel.h>
33 #include <sys/stack.h>
34 #include <sys/pcpu.h>
35 
36 #include <machine/frame.h>
37 #include <machine/md_var.h>
38 #include <machine/reg.h>
39 #include <machine/stack.h>
40 
41 #include <vm/vm.h>
42 #include <vm/vm_param.h>
43 #include <vm/pmap.h>
44 
45 #include "regset.h"
46 
47 uint8_t dtrace_fuword8_nocheck(void *);
48 uint16_t dtrace_fuword16_nocheck(void *);
49 uint32_t dtrace_fuword32_nocheck(void *);
50 uint64_t dtrace_fuword64_nocheck(void *);
51 
52 int	dtrace_ustackdepth_max = 2048;
53 
54 void
55 dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes,
56     uint32_t *intrpc)
57 {
58 	int depth = 0;
59 	register_t rbp;
60 	struct amd64_frame *frame;
61 	vm_offset_t callpc;
62 	pc_t caller = (pc_t) solaris_cpu[curcpu].cpu_dtrace_caller;
63 
64 	if (intrpc != 0)
65 		pcstack[depth++] = (pc_t) intrpc;
66 
67 	aframes++;
68 
69 	__asm __volatile("movq %%rbp,%0" : "=r" (rbp));
70 
71 	frame = (struct amd64_frame *)rbp;
72 	while (depth < pcstack_limit) {
73 		if (!INKERNEL((long) frame))
74 			break;
75 
76 		callpc = frame->f_retaddr;
77 
78 		if (!INKERNEL(callpc))
79 			break;
80 
81 		if (aframes > 0) {
82 			aframes--;
83 			if ((aframes == 0) && (caller != 0)) {
84 				pcstack[depth++] = caller;
85 			}
86 		}
87 		else {
88 			pcstack[depth++] = callpc;
89 		}
90 
91 		if (frame->f_frame <= frame ||
92 		    (vm_offset_t)frame->f_frame >= curthread->td_kstack +
93 		    curthread->td_kstack_pages * PAGE_SIZE)
94 			break;
95 		frame = frame->f_frame;
96 	}
97 
98 	for (; depth < pcstack_limit; depth++) {
99 		pcstack[depth] = 0;
100 	}
101 }
102 
103 static int
104 dtrace_getustack_common(uint64_t *pcstack, int pcstack_limit, uintptr_t pc,
105     uintptr_t sp)
106 {
107 	uintptr_t oldsp;
108 	volatile uint16_t *flags =
109 	    (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
110 	int ret = 0;
111 
112 	ASSERT(pcstack == NULL || pcstack_limit > 0);
113 	ASSERT(dtrace_ustackdepth_max > 0);
114 
115 	while (pc != 0) {
116 		/*
117 		 * We limit the number of times we can go around this
118 		 * loop to account for a circular stack.
119 		 */
120 		if (ret++ >= dtrace_ustackdepth_max) {
121 			*flags |= CPU_DTRACE_BADSTACK;
122 			cpu_core[curcpu].cpuc_dtrace_illval = sp;
123 			break;
124 		}
125 
126 		if (pcstack != NULL) {
127 			*pcstack++ = (uint64_t)pc;
128 			pcstack_limit--;
129 			if (pcstack_limit <= 0)
130 				break;
131 		}
132 
133 		if (sp == 0)
134 			break;
135 
136 		oldsp = sp;
137 
138 		pc = dtrace_fuword64((void *)(sp +
139 			offsetof(struct amd64_frame, f_retaddr)));
140 		sp = dtrace_fuword64((void *)sp);
141 
142 		if (sp == oldsp) {
143 			*flags |= CPU_DTRACE_BADSTACK;
144 			cpu_core[curcpu].cpuc_dtrace_illval = sp;
145 			break;
146 		}
147 
148 		/*
149 		 * This is totally bogus:  if we faulted, we're going to clear
150 		 * the fault and break.  This is to deal with the apparently
151 		 * broken Java stacks on x86.
152 		 */
153 		if (*flags & CPU_DTRACE_FAULT) {
154 			*flags &= ~CPU_DTRACE_FAULT;
155 			break;
156 		}
157 	}
158 
159 	return (ret);
160 }
161 
162 void
163 dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit)
164 {
165 	proc_t *p = curproc;
166 	struct trapframe *tf;
167 	uintptr_t pc, sp, fp;
168 	volatile uint16_t *flags =
169 	    (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
170 	int n;
171 
172 	if (*flags & CPU_DTRACE_FAULT)
173 		return;
174 
175 	if (pcstack_limit <= 0)
176 		return;
177 
178 	/*
179 	 * If there's no user context we still need to zero the stack.
180 	 */
181 	if (p == NULL || (tf = curthread->td_frame) == NULL)
182 		goto zero;
183 
184 	*pcstack++ = (uint64_t)p->p_pid;
185 	pcstack_limit--;
186 
187 	if (pcstack_limit <= 0)
188 		return;
189 
190 	pc = tf->tf_rip;
191 	fp = tf->tf_rbp;
192 	sp = tf->tf_rsp;
193 
194 	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
195 		/*
196 		 * In an entry probe.  The frame pointer has not yet been
197 		 * pushed (that happens in the function prologue).  The
198 		 * best approach is to add the current pc as a missing top
199 		 * of stack and back the pc up to the caller, which is stored
200 		 * at the current stack pointer address since the call
201 		 * instruction puts it there right before the branch.
202 		 */
203 
204 		*pcstack++ = (uint64_t)pc;
205 		pcstack_limit--;
206 		if (pcstack_limit <= 0)
207 			return;
208 
209 		pc = dtrace_fuword64((void *) sp);
210 	}
211 
212 	n = dtrace_getustack_common(pcstack, pcstack_limit, pc, fp);
213 	ASSERT(n >= 0);
214 	ASSERT(n <= pcstack_limit);
215 
216 	pcstack += n;
217 	pcstack_limit -= n;
218 
219 zero:
220 	while (pcstack_limit-- > 0)
221 		*pcstack++ = 0;
222 }
223 
224 int
225 dtrace_getustackdepth(void)
226 {
227 	proc_t *p = curproc;
228 	struct trapframe *tf;
229 	uintptr_t pc, fp, sp;
230 	int n = 0;
231 
232 	if (p == NULL || (tf = curthread->td_frame) == NULL)
233 		return (0);
234 
235 	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT))
236 		return (-1);
237 
238 	pc = tf->tf_rip;
239 	fp = tf->tf_rbp;
240 	sp = tf->tf_rsp;
241 
242 	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
243 		/*
244 		 * In an entry probe.  The frame pointer has not yet been
245 		 * pushed (that happens in the function prologue).  The
246 		 * best approach is to add the current pc as a missing top
247 		 * of stack and back the pc up to the caller, which is stored
248 		 * at the current stack pointer address since the call
249 		 * instruction puts it there right before the branch.
250 		 */
251 
252 		pc = dtrace_fuword64((void *) sp);
253 		n++;
254 	}
255 
256 	n += dtrace_getustack_common(NULL, 0, pc, fp);
257 
258 	return (n);
259 }
260 
261 void
262 dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit)
263 {
264 	proc_t *p = curproc;
265 	struct trapframe *tf;
266 	uintptr_t pc, sp, fp;
267 	volatile uint16_t *flags =
268 	    (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
269 #ifdef notyet	/* XXX signal stack */
270 	uintptr_t oldcontext;
271 	size_t s1, s2;
272 #endif
273 
274 	if (*flags & CPU_DTRACE_FAULT)
275 		return;
276 
277 	if (pcstack_limit <= 0)
278 		return;
279 
280 	/*
281 	 * If there's no user context we still need to zero the stack.
282 	 */
283 	if (p == NULL || (tf = curthread->td_frame) == NULL)
284 		goto zero;
285 
286 	*pcstack++ = (uint64_t)p->p_pid;
287 	pcstack_limit--;
288 
289 	if (pcstack_limit <= 0)
290 		return;
291 
292 	pc = tf->tf_rip;
293 	sp = tf->tf_rsp;
294 	fp = tf->tf_rbp;
295 
296 #ifdef notyet /* XXX signal stack */
297 	oldcontext = lwp->lwp_oldcontext;
298 	s1 = sizeof (struct xframe) + 2 * sizeof (long);
299 	s2 = s1 + sizeof (siginfo_t);
300 #endif
301 
302 	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
303 		*pcstack++ = (uint64_t)pc;
304 		*fpstack++ = 0;
305 		pcstack_limit--;
306 		if (pcstack_limit <= 0)
307 			return;
308 
309 		pc = dtrace_fuword64((void *)sp);
310 	}
311 
312 	while (pc != 0) {
313 		*pcstack++ = (uint64_t)pc;
314 		*fpstack++ = fp;
315 		pcstack_limit--;
316 		if (pcstack_limit <= 0)
317 			break;
318 
319 		if (fp == 0)
320 			break;
321 
322 #ifdef notyet /* XXX signal stack */
323 		if (oldcontext == sp + s1 || oldcontext == sp + s2) {
324 			ucontext_t *ucp = (ucontext_t *)oldcontext;
325 			greg_t *gregs = ucp->uc_mcontext.gregs;
326 
327 			sp = dtrace_fulword(&gregs[REG_FP]);
328 			pc = dtrace_fulword(&gregs[REG_PC]);
329 
330 			oldcontext = dtrace_fulword(&ucp->uc_link);
331 		} else
332 #endif /* XXX */
333 		{
334 			pc = dtrace_fuword64((void *)(fp +
335 				offsetof(struct amd64_frame, f_retaddr)));
336 			fp = dtrace_fuword64((void *)fp);
337 		}
338 
339 		/*
340 		 * This is totally bogus:  if we faulted, we're going to clear
341 		 * the fault and break.  This is to deal with the apparently
342 		 * broken Java stacks on x86.
343 		 */
344 		if (*flags & CPU_DTRACE_FAULT) {
345 			*flags &= ~CPU_DTRACE_FAULT;
346 			break;
347 		}
348 	}
349 
350 zero:
351 	while (pcstack_limit-- > 0)
352 		*pcstack++ = 0;
353 }
354 
355 /*ARGSUSED*/
356 uint64_t
357 dtrace_getarg(int arg, int aframes)
358 {
359 	uintptr_t val;
360 	struct amd64_frame *fp = (struct amd64_frame *)dtrace_getfp();
361 	uintptr_t *stack;
362 	int i;
363 
364 	/*
365 	 * A total of 6 arguments are passed via registers; any argument with
366 	 * index of 5 or lower is therefore in a register.
367 	 */
368 	int inreg = 5;
369 
370 	for (i = 1; i <= aframes; i++) {
371 		fp = fp->f_frame;
372 
373 		if (P2ROUNDUP(fp->f_retaddr, 16) ==
374 		    (long)dtrace_invop_callsite) {
375 			/*
376 			 * In the case of amd64, we will use the pointer to the
377 			 * regs structure that was pushed when we took the
378 			 * trap.  To get this structure, we must increment
379 			 * beyond the frame structure, and then again beyond
380 			 * the calling RIP stored in dtrace_invop().  If the
381 			 * argument that we're seeking is passed on the stack,
382 			 * we'll pull the true stack pointer out of the saved
383 			 * registers and decrement our argument by the number
384 			 * of arguments passed in registers; if the argument
385 			 * we're seeking is passed in registers, we can just
386 			 * load it directly.
387 			 */
388 			struct trapframe *tf = (struct trapframe *)&fp[1];
389 
390 			if (arg <= inreg) {
391 				switch (arg) {
392 				case 0:
393 					stack = (uintptr_t *)&tf->tf_rdi;
394 					break;
395 				case 1:
396 					stack = (uintptr_t *)&tf->tf_rsi;
397 					break;
398 				case 2:
399 					stack = (uintptr_t *)&tf->tf_rdx;
400 					break;
401 				case 3:
402 					stack = (uintptr_t *)&tf->tf_rcx;
403 					break;
404 				case 4:
405 					stack = (uintptr_t *)&tf->tf_r8;
406 					break;
407 				case 5:
408 					stack = (uintptr_t *)&tf->tf_r9;
409 					break;
410 				}
411 				arg = 0;
412 			} else {
413 				stack = (uintptr_t *)(tf->tf_rsp);
414 				arg -= inreg;
415 			}
416 			goto load;
417 		}
418 
419 	}
420 
421 	/*
422 	 * We know that we did not come through a trap to get into
423 	 * dtrace_probe() -- the provider simply called dtrace_probe()
424 	 * directly.  As this is the case, we need to shift the argument
425 	 * that we're looking for:  the probe ID is the first argument to
426 	 * dtrace_probe(), so the argument n will actually be found where
427 	 * one would expect to find argument (n + 1).
428 	 */
429 	arg++;
430 
431 	if (arg <= inreg) {
432 		/*
433 		 * This shouldn't happen.  If the argument is passed in a
434 		 * register then it should have been, well, passed in a
435 		 * register...
436 		 */
437 		DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
438 		return (0);
439 	}
440 
441 	arg -= (inreg + 1);
442 	stack = (uintptr_t *)&fp[1];
443 
444 load:
445 	DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
446 	val = stack[arg];
447 	DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
448 
449 	return (val);
450 }
451 
452 int
453 dtrace_getstackdepth(int aframes)
454 {
455 	int depth = 0;
456 	struct amd64_frame *frame;
457 	vm_offset_t rbp;
458 
459 	aframes++;
460 	rbp = dtrace_getfp();
461 	frame = (struct amd64_frame *)rbp;
462 	depth++;
463 	for(;;) {
464 		if (!INKERNEL((long) frame))
465 			break;
466 		if (!INKERNEL((long) frame->f_frame))
467 			break;
468 		depth++;
469 		if (frame->f_frame <= frame ||
470 		    (vm_offset_t)frame->f_frame >= curthread->td_kstack +
471 		    curthread->td_kstack_pages * PAGE_SIZE)
472 			break;
473 		frame = frame->f_frame;
474 	}
475 	if (depth < aframes)
476 		return 0;
477 	else
478 		return depth - aframes;
479 }
480 
481 ulong_t
482 dtrace_getreg(struct trapframe *rp, uint_t reg)
483 {
484 	/* This table is dependent on reg.d. */
485 	int regmap[] = {
486 		REG_GS,		/* 0  GS */
487 		REG_FS,		/* 1  FS */
488 		REG_ES,		/* 2  ES */
489 		REG_DS,		/* 3  DS */
490 		REG_RDI,	/* 4  EDI */
491 		REG_RSI,	/* 5  ESI */
492 		REG_RBP,	/* 6  EBP, REG_FP */
493 		REG_RSP,	/* 7  ESP */
494 		REG_RBX,	/* 8  EBX, REG_R1 */
495 		REG_RDX,	/* 9  EDX */
496 		REG_RCX,	/* 10 ECX */
497 		REG_RAX,	/* 11 EAX, REG_R0 */
498 		REG_TRAPNO,	/* 12 TRAPNO */
499 		REG_ERR,	/* 13 ERR */
500 		REG_RIP,	/* 14 EIP, REG_PC */
501 		REG_CS,		/* 15 CS */
502 		REG_RFL,	/* 16 EFL, REG_PS */
503 		REG_RSP,	/* 17 UESP, REG_SP */
504 		REG_SS		/* 18 SS */
505 	};
506 
507 	if (reg <= SS) {
508 		if (reg >= sizeof (regmap) / sizeof (int)) {
509 			DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
510 			return (0);
511 		}
512 
513 		reg = regmap[reg];
514 	} else {
515 		/* This is dependent on reg.d. */
516 		reg -= SS + 1;
517 	}
518 
519 	switch (reg) {
520 	case REG_RDI:
521 		return (rp->tf_rdi);
522 	case REG_RSI:
523 		return (rp->tf_rsi);
524 	case REG_RDX:
525 		return (rp->tf_rdx);
526 	case REG_RCX:
527 		return (rp->tf_rcx);
528 	case REG_R8:
529 		return (rp->tf_r8);
530 	case REG_R9:
531 		return (rp->tf_r9);
532 	case REG_RAX:
533 		return (rp->tf_rax);
534 	case REG_RBX:
535 		return (rp->tf_rbx);
536 	case REG_RBP:
537 		return (rp->tf_rbp);
538 	case REG_R10:
539 		return (rp->tf_r10);
540 	case REG_R11:
541 		return (rp->tf_r11);
542 	case REG_R12:
543 		return (rp->tf_r12);
544 	case REG_R13:
545 		return (rp->tf_r13);
546 	case REG_R14:
547 		return (rp->tf_r14);
548 	case REG_R15:
549 		return (rp->tf_r15);
550 	case REG_DS:
551 		return (rp->tf_ds);
552 	case REG_ES:
553 		return (rp->tf_es);
554 	case REG_FS:
555 		return (rp->tf_fs);
556 	case REG_GS:
557 		return (rp->tf_gs);
558 	case REG_TRAPNO:
559 		return (rp->tf_trapno);
560 	case REG_ERR:
561 		return (rp->tf_err);
562 	case REG_RIP:
563 		return (rp->tf_rip);
564 	case REG_CS:
565 		return (rp->tf_cs);
566 	case REG_SS:
567 		return (rp->tf_ss);
568 	case REG_RFL:
569 		return (rp->tf_rflags);
570 	case REG_RSP:
571 		return (rp->tf_rsp);
572 	default:
573 		DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
574 		return (0);
575 	}
576 }
577 
578 static int
579 dtrace_copycheck(uintptr_t uaddr, uintptr_t kaddr, size_t size)
580 {
581 	ASSERT(INKERNEL(kaddr) && kaddr + size >= kaddr);
582 
583 	if (uaddr + size > VM_MAXUSER_ADDRESS || uaddr + size < uaddr) {
584 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
585 		cpu_core[curcpu].cpuc_dtrace_illval = uaddr;
586 		return (0);
587 	}
588 
589 	return (1);
590 }
591 
592 void
593 dtrace_copyin(uintptr_t uaddr, uintptr_t kaddr, size_t size,
594     volatile uint16_t *flags)
595 {
596 	if (dtrace_copycheck(uaddr, kaddr, size))
597 		dtrace_copy(uaddr, kaddr, size);
598 }
599 
600 void
601 dtrace_copyout(uintptr_t kaddr, uintptr_t uaddr, size_t size,
602     volatile uint16_t *flags)
603 {
604 	if (dtrace_copycheck(uaddr, kaddr, size))
605 		dtrace_copy(kaddr, uaddr, size);
606 }
607 
608 void
609 dtrace_copyinstr(uintptr_t uaddr, uintptr_t kaddr, size_t size,
610     volatile uint16_t *flags)
611 {
612 	if (dtrace_copycheck(uaddr, kaddr, size))
613 		dtrace_copystr(uaddr, kaddr, size, flags);
614 }
615 
616 void
617 dtrace_copyoutstr(uintptr_t kaddr, uintptr_t uaddr, size_t size,
618     volatile uint16_t *flags)
619 {
620 	if (dtrace_copycheck(uaddr, kaddr, size))
621 		dtrace_copystr(kaddr, uaddr, size, flags);
622 }
623 
624 uint8_t
625 dtrace_fuword8(void *uaddr)
626 {
627 	if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
628 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
629 		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
630 		return (0);
631 	}
632 	return (dtrace_fuword8_nocheck(uaddr));
633 }
634 
635 uint16_t
636 dtrace_fuword16(void *uaddr)
637 {
638 	if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
639 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
640 		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
641 		return (0);
642 	}
643 	return (dtrace_fuword16_nocheck(uaddr));
644 }
645 
646 uint32_t
647 dtrace_fuword32(void *uaddr)
648 {
649 	if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
650 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
651 		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
652 		return (0);
653 	}
654 	return (dtrace_fuword32_nocheck(uaddr));
655 }
656 
657 uint64_t
658 dtrace_fuword64(void *uaddr)
659 {
660 	if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
661 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
662 		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
663 		return (0);
664 	}
665 	return (dtrace_fuword64_nocheck(uaddr));
666 }
667