xref: /freebsd/sys/cddl/dev/dtrace/amd64/dtrace_isa.c (revision a4dc509f723944821bcfcc52005ff87c9a5dee5b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  *
22  * $FreeBSD$
23  */
24 /*
25  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
26  * Use is subject to license terms.
27  */
28 #include <sys/cdefs.h>
29 
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/kernel.h>
33 #include <sys/stack.h>
34 #include <sys/pcpu.h>
35 
36 #include <machine/frame.h>
37 #include <machine/md_var.h>
38 #include <machine/reg.h>
39 #include <machine/stack.h>
40 
41 #include <vm/vm.h>
42 #include <vm/vm_param.h>
43 #include <vm/pmap.h>
44 
45 #include "regset.h"
46 
47 uint8_t dtrace_fuword8_nocheck(void *);
48 uint16_t dtrace_fuword16_nocheck(void *);
49 uint32_t dtrace_fuword32_nocheck(void *);
50 uint64_t dtrace_fuword64_nocheck(void *);
51 
52 int	dtrace_ustackdepth_max = 2048;
53 
54 void
55 dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes,
56     uint32_t *intrpc)
57 {
58 	int depth = 0;
59 	register_t rbp;
60 	struct amd64_frame *frame;
61 	vm_offset_t callpc;
62 	pc_t caller = (pc_t) solaris_cpu[curcpu].cpu_dtrace_caller;
63 
64 	if (intrpc != 0)
65 		pcstack[depth++] = (pc_t) intrpc;
66 
67 	aframes++;
68 
69 	__asm __volatile("movq %%rbp,%0" : "=r" (rbp));
70 
71 	frame = (struct amd64_frame *)rbp;
72 	while (depth < pcstack_limit) {
73 		if (!INKERNEL((long) frame))
74 			break;
75 
76 		callpc = frame->f_retaddr;
77 
78 		if (!INKERNEL(callpc))
79 			break;
80 
81 		if (aframes > 0) {
82 			aframes--;
83 			if ((aframes == 0) && (caller != 0)) {
84 				pcstack[depth++] = caller;
85 			}
86 		}
87 		else {
88 			pcstack[depth++] = callpc;
89 		}
90 
91 		if (frame->f_frame <= frame ||
92 		    (vm_offset_t)frame->f_frame >= curthread->td_kstack +
93 		    curthread->td_kstack_pages * PAGE_SIZE)
94 			break;
95 		frame = frame->f_frame;
96 	}
97 
98 	for (; depth < pcstack_limit; depth++) {
99 		pcstack[depth] = 0;
100 	}
101 }
102 
103 static int
104 dtrace_getustack_common(uint64_t *pcstack, int pcstack_limit, uintptr_t pc,
105     uintptr_t sp)
106 {
107 	uintptr_t oldsp;
108 	volatile uint16_t *flags =
109 	    (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
110 	int ret = 0;
111 
112 	ASSERT(pcstack == NULL || pcstack_limit > 0);
113 	ASSERT(dtrace_ustackdepth_max > 0);
114 
115 	while (pc != 0) {
116 		/*
117 		 * We limit the number of times we can go around this
118 		 * loop to account for a circular stack.
119 		 */
120 		if (ret++ >= dtrace_ustackdepth_max) {
121 			*flags |= CPU_DTRACE_BADSTACK;
122 			cpu_core[curcpu].cpuc_dtrace_illval = sp;
123 			break;
124 		}
125 
126 		if (pcstack != NULL) {
127 			*pcstack++ = (uint64_t)pc;
128 			pcstack_limit--;
129 			if (pcstack_limit <= 0)
130 				break;
131 		}
132 
133 		if (sp == 0)
134 			break;
135 
136 		oldsp = sp;
137 
138 		pc = dtrace_fuword64((void *)(sp +
139 			offsetof(struct amd64_frame, f_retaddr)));
140 		sp = dtrace_fuword64((void *)sp);
141 
142 		if (sp == oldsp) {
143 			*flags |= CPU_DTRACE_BADSTACK;
144 			cpu_core[curcpu].cpuc_dtrace_illval = sp;
145 			break;
146 		}
147 
148 		/*
149 		 * This is totally bogus:  if we faulted, we're going to clear
150 		 * the fault and break.  This is to deal with the apparently
151 		 * broken Java stacks on x86.
152 		 */
153 		if (*flags & CPU_DTRACE_FAULT) {
154 			*flags &= ~CPU_DTRACE_FAULT;
155 			break;
156 		}
157 	}
158 
159 	return (ret);
160 }
161 
162 void
163 dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit)
164 {
165 	proc_t *p = curproc;
166 	struct trapframe *tf;
167 	uintptr_t pc, sp, fp;
168 	volatile uint16_t *flags =
169 	    (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
170 	int n;
171 
172 	if (*flags & CPU_DTRACE_FAULT)
173 		return;
174 
175 	if (pcstack_limit <= 0)
176 		return;
177 
178 	/*
179 	 * If there's no user context we still need to zero the stack.
180 	 */
181 	if (p == NULL || (tf = curthread->td_frame) == NULL)
182 		goto zero;
183 
184 	*pcstack++ = (uint64_t)p->p_pid;
185 	pcstack_limit--;
186 
187 	if (pcstack_limit <= 0)
188 		return;
189 
190 	pc = tf->tf_rip;
191 	fp = tf->tf_rbp;
192 	sp = tf->tf_rsp;
193 
194 	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
195 		/*
196 		 * In an entry probe.  The frame pointer has not yet been
197 		 * pushed (that happens in the function prologue).  The
198 		 * best approach is to add the current pc as a missing top
199 		 * of stack and back the pc up to the caller, which is stored
200 		 * at the current stack pointer address since the call
201 		 * instruction puts it there right before the branch.
202 		 */
203 
204 		*pcstack++ = (uint64_t)pc;
205 		pcstack_limit--;
206 		if (pcstack_limit <= 0)
207 			return;
208 
209 		pc = dtrace_fuword64((void *) sp);
210 	}
211 
212 	n = dtrace_getustack_common(pcstack, pcstack_limit, pc, fp);
213 	ASSERT(n >= 0);
214 	ASSERT(n <= pcstack_limit);
215 
216 	pcstack += n;
217 	pcstack_limit -= n;
218 
219 zero:
220 	while (pcstack_limit-- > 0)
221 		*pcstack++ = 0;
222 }
223 
224 int
225 dtrace_getustackdepth(void)
226 {
227 	proc_t *p = curproc;
228 	struct trapframe *tf;
229 	uintptr_t pc, fp, sp;
230 	int n = 0;
231 
232 	if (p == NULL || (tf = curthread->td_frame) == NULL)
233 		return (0);
234 
235 	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT))
236 		return (-1);
237 
238 	pc = tf->tf_rip;
239 	fp = tf->tf_rbp;
240 	sp = tf->tf_rsp;
241 
242 	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
243 		/*
244 		 * In an entry probe.  The frame pointer has not yet been
245 		 * pushed (that happens in the function prologue).  The
246 		 * best approach is to add the current pc as a missing top
247 		 * of stack and back the pc up to the caller, which is stored
248 		 * at the current stack pointer address since the call
249 		 * instruction puts it there right before the branch.
250 		 */
251 
252 		pc = dtrace_fuword64((void *) sp);
253 		n++;
254 	}
255 
256 	n += dtrace_getustack_common(NULL, 0, pc, fp);
257 
258 	return (n);
259 }
260 
261 void
262 dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit)
263 {
264 	proc_t *p = curproc;
265 	struct trapframe *tf;
266 	uintptr_t pc, sp, fp;
267 	volatile uint16_t *flags =
268 	    (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
269 #ifdef notyet	/* XXX signal stack */
270 	uintptr_t oldcontext;
271 	size_t s1, s2;
272 #endif
273 
274 	if (*flags & CPU_DTRACE_FAULT)
275 		return;
276 
277 	if (pcstack_limit <= 0)
278 		return;
279 
280 	/*
281 	 * If there's no user context we still need to zero the stack.
282 	 */
283 	if (p == NULL || (tf = curthread->td_frame) == NULL)
284 		goto zero;
285 
286 	*pcstack++ = (uint64_t)p->p_pid;
287 	pcstack_limit--;
288 
289 	if (pcstack_limit <= 0)
290 		return;
291 
292 	pc = tf->tf_rip;
293 	sp = tf->tf_rsp;
294 	fp = tf->tf_rbp;
295 
296 #ifdef notyet /* XXX signal stack */
297 	oldcontext = lwp->lwp_oldcontext;
298 	s1 = sizeof (struct xframe) + 2 * sizeof (long);
299 	s2 = s1 + sizeof (siginfo_t);
300 #endif
301 
302 	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
303 		*pcstack++ = (uint64_t)pc;
304 		*fpstack++ = 0;
305 		pcstack_limit--;
306 		if (pcstack_limit <= 0)
307 			return;
308 
309 		pc = dtrace_fuword64((void *)sp);
310 	}
311 
312 	while (pc != 0) {
313 		*pcstack++ = (uint64_t)pc;
314 		*fpstack++ = fp;
315 		pcstack_limit--;
316 		if (pcstack_limit <= 0)
317 			break;
318 
319 		if (fp == 0)
320 			break;
321 
322 #ifdef notyet /* XXX signal stack */
323 		if (oldcontext == sp + s1 || oldcontext == sp + s2) {
324 			ucontext_t *ucp = (ucontext_t *)oldcontext;
325 			greg_t *gregs = ucp->uc_mcontext.gregs;
326 
327 			sp = dtrace_fulword(&gregs[REG_FP]);
328 			pc = dtrace_fulword(&gregs[REG_PC]);
329 
330 			oldcontext = dtrace_fulword(&ucp->uc_link);
331 		} else
332 #endif /* XXX */
333 		{
334 			pc = dtrace_fuword64((void *)(fp +
335 				offsetof(struct amd64_frame, f_retaddr)));
336 			fp = dtrace_fuword64((void *)fp);
337 		}
338 
339 		/*
340 		 * This is totally bogus:  if we faulted, we're going to clear
341 		 * the fault and break.  This is to deal with the apparently
342 		 * broken Java stacks on x86.
343 		 */
344 		if (*flags & CPU_DTRACE_FAULT) {
345 			*flags &= ~CPU_DTRACE_FAULT;
346 			break;
347 		}
348 	}
349 
350 zero:
351 	while (pcstack_limit-- > 0)
352 		*pcstack++ = 0;
353 }
354 
355 /*ARGSUSED*/
356 uint64_t
357 dtrace_getarg(int arg, int aframes)
358 {
359 	uintptr_t val;
360 	struct amd64_frame *fp = (struct amd64_frame *)dtrace_getfp();
361 	uintptr_t *stack;
362 	int i;
363 
364 	/*
365 	 * A total of 6 arguments are passed via registers; any argument with
366 	 * index of 5 or lower is therefore in a register.
367 	 */
368 	int inreg = 5;
369 
370 	for (i = 1; i <= aframes; i++) {
371 		fp = fp->f_frame;
372 
373 		if (P2ROUNDUP(fp->f_retaddr, 16) ==
374 		    (long)dtrace_invop_callsite) {
375 			/*
376 			 * In the case of amd64, we will use the pointer to the
377 			 * regs structure that was pushed when we took the
378 			 * trap.  To get this structure, we must increment
379 			 * beyond the frame structure, and then again beyond
380 			 * the calling RIP stored in dtrace_invop().  If the
381 			 * argument that we're seeking is passed on the stack,
382 			 * we'll pull the true stack pointer out of the saved
383 			 * registers and decrement our argument by the number
384 			 * of arguments passed in registers; if the argument
385 			 * we're seeking is passed in regsiters, we can just
386 			 * load it directly.
387 			 */
388 			struct trapframe *tf =
389 			    (struct trapframe *)((uintptr_t)&fp[1]);
390 
391 			if (arg <= inreg) {
392 				switch (arg) {
393 				case 0:
394 					stack = (uintptr_t *)&tf->tf_rdi;
395 					break;
396 				case 1:
397 					stack = (uintptr_t *)&tf->tf_rsi;
398 					break;
399 				case 2:
400 					stack = (uintptr_t *)&tf->tf_rdx;
401 					break;
402 				case 3:
403 					stack = (uintptr_t *)&tf->tf_rcx;
404 					break;
405 				case 4:
406 					stack = (uintptr_t *)&tf->tf_r8;
407 					break;
408 				case 5:
409 					stack = (uintptr_t *)&tf->tf_r9;
410 					break;
411 				}
412 				arg = 0;
413 			} else {
414 				stack = (uintptr_t *)(tf->tf_rsp);
415 				arg -= inreg;
416 			}
417 			goto load;
418 		}
419 
420 	}
421 
422 	/*
423 	 * We know that we did not come through a trap to get into
424 	 * dtrace_probe() -- the provider simply called dtrace_probe()
425 	 * directly.  As this is the case, we need to shift the argument
426 	 * that we're looking for:  the probe ID is the first argument to
427 	 * dtrace_probe(), so the argument n will actually be found where
428 	 * one would expect to find argument (n + 1).
429 	 */
430 	arg++;
431 
432 	if (arg <= inreg) {
433 		/*
434 		 * This shouldn't happen.  If the argument is passed in a
435 		 * register then it should have been, well, passed in a
436 		 * register...
437 		 */
438 		DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
439 		return (0);
440 	}
441 
442 	arg -= (inreg + 1);
443 	stack = (uintptr_t *)&fp[1];
444 
445 load:
446 	DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
447 	val = stack[arg];
448 	DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
449 
450 	return (val);
451 }
452 
453 int
454 dtrace_getstackdepth(int aframes)
455 {
456 	int depth = 0;
457 	struct amd64_frame *frame;
458 	vm_offset_t rbp;
459 
460 	aframes++;
461 	rbp = dtrace_getfp();
462 	frame = (struct amd64_frame *)rbp;
463 	depth++;
464 	for(;;) {
465 		if (!INKERNEL((long) frame))
466 			break;
467 		if (!INKERNEL((long) frame->f_frame))
468 			break;
469 		depth++;
470 		if (frame->f_frame <= frame ||
471 		    (vm_offset_t)frame->f_frame >= curthread->td_kstack +
472 		    curthread->td_kstack_pages * PAGE_SIZE)
473 			break;
474 		frame = frame->f_frame;
475 	}
476 	if (depth < aframes)
477 		return 0;
478 	else
479 		return depth - aframes;
480 }
481 
482 ulong_t
483 dtrace_getreg(struct trapframe *rp, uint_t reg)
484 {
485 	/* This table is dependent on reg.d. */
486 	int regmap[] = {
487 		REG_GS,		/* 0  GS */
488 		REG_FS,		/* 1  FS */
489 		REG_ES,		/* 2  ES */
490 		REG_DS,		/* 3  DS */
491 		REG_RDI,	/* 4  EDI */
492 		REG_RSI,	/* 5  ESI */
493 		REG_RBP,	/* 6  EBP, REG_FP */
494 		REG_RSP,	/* 7  ESP */
495 		REG_RBX,	/* 8  EBX, REG_R1 */
496 		REG_RDX,	/* 9  EDX */
497 		REG_RCX,	/* 10 ECX */
498 		REG_RAX,	/* 11 EAX, REG_R0 */
499 		REG_TRAPNO,	/* 12 TRAPNO */
500 		REG_ERR,	/* 13 ERR */
501 		REG_RIP,	/* 14 EIP, REG_PC */
502 		REG_CS,		/* 15 CS */
503 		REG_RFL,	/* 16 EFL, REG_PS */
504 		REG_RSP,	/* 17 UESP, REG_SP */
505 		REG_SS		/* 18 SS */
506 	};
507 
508 	if (reg <= SS) {
509 		if (reg >= sizeof (regmap) / sizeof (int)) {
510 			DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
511 			return (0);
512 		}
513 
514 		reg = regmap[reg];
515 	} else {
516 		/* This is dependent on reg.d. */
517 		reg -= SS + 1;
518 	}
519 
520 	switch (reg) {
521 	case REG_RDI:
522 		return (rp->tf_rdi);
523 	case REG_RSI:
524 		return (rp->tf_rsi);
525 	case REG_RDX:
526 		return (rp->tf_rdx);
527 	case REG_RCX:
528 		return (rp->tf_rcx);
529 	case REG_R8:
530 		return (rp->tf_r8);
531 	case REG_R9:
532 		return (rp->tf_r9);
533 	case REG_RAX:
534 		return (rp->tf_rax);
535 	case REG_RBX:
536 		return (rp->tf_rbx);
537 	case REG_RBP:
538 		return (rp->tf_rbp);
539 	case REG_R10:
540 		return (rp->tf_r10);
541 	case REG_R11:
542 		return (rp->tf_r11);
543 	case REG_R12:
544 		return (rp->tf_r12);
545 	case REG_R13:
546 		return (rp->tf_r13);
547 	case REG_R14:
548 		return (rp->tf_r14);
549 	case REG_R15:
550 		return (rp->tf_r15);
551 	case REG_DS:
552 		return (rp->tf_ds);
553 	case REG_ES:
554 		return (rp->tf_es);
555 	case REG_FS:
556 		return (rp->tf_fs);
557 	case REG_GS:
558 		return (rp->tf_gs);
559 	case REG_TRAPNO:
560 		return (rp->tf_trapno);
561 	case REG_ERR:
562 		return (rp->tf_err);
563 	case REG_RIP:
564 		return (rp->tf_rip);
565 	case REG_CS:
566 		return (rp->tf_cs);
567 	case REG_SS:
568 		return (rp->tf_ss);
569 	case REG_RFL:
570 		return (rp->tf_rflags);
571 	case REG_RSP:
572 		return (rp->tf_rsp);
573 	default:
574 		DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
575 		return (0);
576 	}
577 }
578 
579 static int
580 dtrace_copycheck(uintptr_t uaddr, uintptr_t kaddr, size_t size)
581 {
582 	ASSERT(INKERNEL(kaddr) && kaddr + size >= kaddr);
583 
584 	if (uaddr + size > VM_MAXUSER_ADDRESS || uaddr + size < uaddr) {
585 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
586 		cpu_core[curcpu].cpuc_dtrace_illval = uaddr;
587 		return (0);
588 	}
589 
590 	return (1);
591 }
592 
593 void
594 dtrace_copyin(uintptr_t uaddr, uintptr_t kaddr, size_t size,
595     volatile uint16_t *flags)
596 {
597 	if (dtrace_copycheck(uaddr, kaddr, size))
598 		dtrace_copy(uaddr, kaddr, size);
599 }
600 
601 void
602 dtrace_copyout(uintptr_t kaddr, uintptr_t uaddr, size_t size,
603     volatile uint16_t *flags)
604 {
605 	if (dtrace_copycheck(uaddr, kaddr, size))
606 		dtrace_copy(kaddr, uaddr, size);
607 }
608 
609 void
610 dtrace_copyinstr(uintptr_t uaddr, uintptr_t kaddr, size_t size,
611     volatile uint16_t *flags)
612 {
613 	if (dtrace_copycheck(uaddr, kaddr, size))
614 		dtrace_copystr(uaddr, kaddr, size, flags);
615 }
616 
617 void
618 dtrace_copyoutstr(uintptr_t kaddr, uintptr_t uaddr, size_t size,
619     volatile uint16_t *flags)
620 {
621 	if (dtrace_copycheck(uaddr, kaddr, size))
622 		dtrace_copystr(kaddr, uaddr, size, flags);
623 }
624 
625 uint8_t
626 dtrace_fuword8(void *uaddr)
627 {
628 	if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
629 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
630 		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
631 		return (0);
632 	}
633 	return (dtrace_fuword8_nocheck(uaddr));
634 }
635 
636 uint16_t
637 dtrace_fuword16(void *uaddr)
638 {
639 	if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
640 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
641 		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
642 		return (0);
643 	}
644 	return (dtrace_fuword16_nocheck(uaddr));
645 }
646 
647 uint32_t
648 dtrace_fuword32(void *uaddr)
649 {
650 	if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
651 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
652 		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
653 		return (0);
654 	}
655 	return (dtrace_fuword32_nocheck(uaddr));
656 }
657 
658 uint64_t
659 dtrace_fuword64(void *uaddr)
660 {
661 	if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
662 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
663 		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
664 		return (0);
665 	}
666 	return (dtrace_fuword64_nocheck(uaddr));
667 }
668