xref: /freebsd/sys/cddl/dev/dtrace/amd64/dtrace_asm.S (revision d0b2dbfa0ecf2bbc9709efc5e20baf8e4b44bbbf)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 *
21 * Portions Copyright 2008 John Birrell <jb@freebsd.org>
22 *
23 */
24/*
25 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
26 * Use is subject to license terms.
27 */
28
29#define _ASM
30
31#include <machine/asmacros.h>
32#include <sys/cpuvar_defs.h>
33#include <sys/dtrace.h>
34
35#include "assym.inc"
36
37#define INTR_POP				\
38	movq	TF_RDI(%rsp),%rdi;		\
39	movq	TF_RSI(%rsp),%rsi;		\
40	movq	TF_RDX(%rsp),%rdx;		\
41	movq	TF_RCX(%rsp),%rcx;		\
42	movq	TF_R8(%rsp),%r8;		\
43	movq	TF_R9(%rsp),%r9;		\
44	movq	TF_RAX(%rsp),%rax;		\
45	movq	TF_RBX(%rsp),%rbx;		\
46	movq	TF_RBP(%rsp),%rbp;		\
47	movq	TF_R10(%rsp),%r10;		\
48	movq	TF_R11(%rsp),%r11;		\
49	movq	TF_R12(%rsp),%r12;		\
50	movq	TF_R13(%rsp),%r13;		\
51	movq	TF_R14(%rsp),%r14;		\
52	movq	TF_R15(%rsp),%r15;		\
53	testb	$SEL_RPL_MASK,TF_CS(%rsp);	\
54	jz	1f;				\
55	cli;					\
56	swapgs;					\
571:	addq	$TF_RIP,%rsp;
58
59.globl dtrace_invop_callsite
60.type dtrace_invop_callsite,@function
61
62	ENTRY(dtrace_invop_start)
63
64	/*
65	 * #BP traps with %rip set to the next address. We need to decrement
66	 * the value to indicate the address of the int3 (0xcc) instruction
67	 * that we substituted.
68	 */
69	movq	TF_RIP(%rsp), %rdi
70	decq	%rdi
71	movq	%rsp, %rsi
72
73	/*
74	 * Allocate some scratch space to let the invop handler return a value.
75	 * This is needed when emulating "call" instructions.
76	 */
77	subq	$16, %rsp
78	movq	%rsp, %rdx
79
80	call	dtrace_invop
81dtrace_invop_callsite:
82	addq	$16, %rsp
83
84	cmpl	$DTRACE_INVOP_PUSHL_EBP, %eax
85	je	bp_push
86	cmpl	$DTRACE_INVOP_CALL, %eax
87	je	bp_call
88	cmpl	$DTRACE_INVOP_LEAVE, %eax
89	je	bp_leave
90	cmpl	$DTRACE_INVOP_NOP, %eax
91	je	bp_nop
92	cmpl	$DTRACE_INVOP_RET, %eax
93	je	bp_ret
94
95	/* When all else fails handle the trap in the usual way. */
96	jmpq	*dtrace_invop_calltrap_addr
97
98bp_push:
99	/*
100	 * We must emulate a "pushq %rbp".  To do this, we pull the stack
101	 * down 8 bytes, and then store the base pointer.
102	 */
103	INTR_POP
104	subq	$16, %rsp		/* make room for %rbp */
105	pushq	%rax			/* push temp */
106	movq	24(%rsp), %rax		/* load calling RIP */
107	movq	%rax, 8(%rsp)		/* store calling RIP */
108	movq	32(%rsp), %rax		/* load calling CS */
109	movq	%rax, 16(%rsp)		/* store calling CS */
110	movq	40(%rsp), %rax		/* load calling RFLAGS */
111	movq	%rax, 24(%rsp)		/* store calling RFLAGS */
112	movq	48(%rsp), %rax		/* load calling RSP */
113	subq	$8, %rax		/* make room for %rbp */
114	movq	%rax, 32(%rsp)		/* store calling RSP */
115	movq	56(%rsp), %rax		/* load calling SS */
116	movq	%rax, 40(%rsp)		/* store calling SS */
117	movq	32(%rsp), %rax		/* reload calling RSP */
118	movq	%rbp, (%rax)		/* store %rbp there */
119	popq	%rax			/* pop off temp */
120	iretq				/* return from interrupt */
121	/*NOTREACHED*/
122
123bp_call:
124	/*
125	 * Emulate a "call" instruction.  The invop handler must have already
126	 * updated the saved copy of %rip in the register set.  It's our job to
127	 * pull the hardware-saved registers down to make space for the return
128	 * address, which is provided by the invop handler in our scratch
129	 * space.
130	 */
131	INTR_POP
132	subq	$16, %rsp		/* make room for %rbp */
133	pushq	%rax			/* push temp */
134	pushq	%rbx			/* push temp */
135
136	movq	32(%rsp), %rax		/* load calling RIP */
137	movq	%rax, 16(%rsp)		/* store calling RIP */
138	movq	40(%rsp), %rax		/* load calling CS */
139	movq	%rax, 24(%rsp)		/* store calling CS */
140	movq	48(%rsp), %rax		/* load calling RFLAGS */
141	movq	%rax, 32(%rsp)		/* store calling RFLAGS */
142	movq	56(%rsp), %rax		/* load calling RSP */
143	subq	$8, %rax		/* make room for return address */
144	movq	%rax, 40(%rsp)		/* store calling RSP */
145	movq	64(%rsp), %rax		/* load calling SS */
146	movq	%rax, 48(%rsp)		/* store calling SS */
147
148	movq	-(TF_RIP - 16)(%rsp), %rax /* load return address */
149	movq	40(%rsp), %rbx		/* reload calling RSP */
150	movq	%rax, (%rbx)		/* store return address */
151
152	popq	%rbx			/* pop temp */
153	popq	%rax			/* pop temp */
154	iretq				/* return from interrupt */
155	/*NOTREACHED*/
156
157bp_leave:
158	/*
159	 * We must emulate a "leave", which is the same as a "movq %rbp, %rsp"
160	 * followed by a "popq %rbp".  This is quite a bit simpler on amd64
161	 * than it is on i386 -- we can exploit the fact that the %rsp is
162	 * explicitly saved to effect the pop without having to reshuffle
163	 * the other data pushed for the trap.
164	 */
165	INTR_POP
166	pushq	%rax			/* push temp */
167	movq	8(%rsp), %rax		/* load calling RIP */
168	movq	%rax, 8(%rsp)		/* store calling RIP */
169	movq	(%rbp), %rax		/* get new %rbp */
170	addq	$8, %rbp		/* adjust new %rsp */
171	movq	%rbp, 32(%rsp)		/* store new %rsp */
172	movq	%rax, %rbp		/* set new %rbp */
173	popq	%rax			/* pop off temp */
174	iretq				/* return from interrupt */
175	/*NOTREACHED*/
176
177bp_nop:
178	/* We must emulate a "nop". */
179	INTR_POP
180	iretq
181	/*NOTREACHED*/
182
183bp_ret:
184	INTR_POP
185	pushq	%rax			/* push temp */
186	movq	32(%rsp), %rax		/* load %rsp */
187	movq	(%rax), %rax		/* load calling RIP */
188	movq	%rax, 8(%rsp)		/* store calling RIP */
189	addq	$8, 32(%rsp)		/* adjust new %rsp */
190	popq	%rax			/* pop off temp */
191	iretq				/* return from interrupt */
192	/*NOTREACHED*/
193
194	END(dtrace_invop_start)
195
196/*
197greg_t dtrace_getfp(void)
198*/
199	ENTRY(dtrace_getfp)
200	movq	%rbp, %rax
201	ret
202	END(dtrace_getfp)
203
204/*
205uint32_t
206dtrace_cas32(uint32_t *target, uint32_t cmp, uint32_t new)
207*/
208	ENTRY(dtrace_cas32)
209	movl	%esi, %eax
210	lock
211	cmpxchgl %edx, (%rdi)
212	ret
213	END(dtrace_cas32)
214
215/*
216void *
217dtrace_casptr(void *target, void *cmp, void *new)
218*/
219	ENTRY(dtrace_casptr)
220	movq	%rsi, %rax
221	lock
222	cmpxchgq %rdx, (%rdi)
223	ret
224	END(dtrace_casptr)
225
226/*
227uintptr_t
228dtrace_caller(int aframes)
229*/
230	ENTRY(dtrace_caller)
231	movq	$-1, %rax
232	ret
233	END(dtrace_caller)
234
235/*
236void
237dtrace_copy(uintptr_t src, uintptr_t dest, size_t size)
238*/
239	ENTRY(dtrace_copy_nosmap)
240	pushq	%rbp
241	movq	%rsp, %rbp
242
243	xchgq	%rdi, %rsi		/* make %rsi source, %rdi dest */
244	movq	%rdx, %rcx		/* load count */
245	repz				/* repeat for count ... */
246	smovb				/*   move from %ds:rsi to %ed:rdi */
247	leave
248	ret
249	END(dtrace_copy_nosmap)
250
251	ENTRY(dtrace_copy_smap)
252	pushq	%rbp
253	movq	%rsp, %rbp
254
255	xchgq	%rdi, %rsi		/* make %rsi source, %rdi dest */
256	movq	%rdx, %rcx		/* load count */
257	stac
258	repz				/* repeat for count ... */
259	smovb				/*   move from %ds:rsi to %ed:rdi */
260	clac
261	leave
262	ret
263	END(dtrace_copy_smap)
264
265/*
266void
267dtrace_copystr(uintptr_t uaddr, uintptr_t kaddr, size_t size,
268    volatile uint16_t *flags)
269*/
270	ENTRY(dtrace_copystr_nosmap)
271	pushq	%rbp
272	movq	%rsp, %rbp
273
2740:
275	movb	(%rdi), %al		/* load from source */
276	movb	%al, (%rsi)		/* store to destination */
277	addq	$1, %rdi		/* increment source pointer */
278	addq	$1, %rsi		/* increment destination pointer */
279	subq	$1, %rdx		/* decrement remaining count */
280	cmpb	$0, %al
281	je	2f
282	testq	$0xfff, %rdx		/* test if count is 4k-aligned */
283	jnz	1f			/* if not, continue with copying */
284	testq	$CPU_DTRACE_BADADDR, (%rcx) /* load and test dtrace flags */
285	jnz	2f
2861:
287	cmpq	$0, %rdx
288	jne	0b
2892:
290	leave
291	ret
292
293	END(dtrace_copystr_nosmap)
294
295	ENTRY(dtrace_copystr_smap)
296	pushq	%rbp
297	movq	%rsp, %rbp
298
299	stac
3000:
301	movb	(%rdi), %al		/* load from source */
302	movb	%al, (%rsi)		/* store to destination */
303	addq	$1, %rdi		/* increment source pointer */
304	addq	$1, %rsi		/* increment destination pointer */
305	subq	$1, %rdx		/* decrement remaining count */
306	cmpb	$0, %al
307	je	2f
308	testq	$0xfff, %rdx		/* test if count is 4k-aligned */
309	jnz	1f			/* if not, continue with copying */
310	testq	$CPU_DTRACE_BADADDR, (%rcx) /* load and test dtrace flags */
311	jnz	2f
3121:
313	cmpq	$0, %rdx
314	jne	0b
3152:
316	clac
317	leave
318	ret
319
320	END(dtrace_copystr_smap)
321
322/*
323uintptr_t
324dtrace_fulword(void *addr)
325*/
326	ENTRY(dtrace_fulword_nosmap)
327	movq	(%rdi), %rax
328	ret
329	END(dtrace_fulword_nosmap)
330
331	ENTRY(dtrace_fulword_smap)
332	stac
333	movq	(%rdi), %rax
334	clac
335	ret
336	END(dtrace_fulword_smap)
337
338/*
339uint8_t
340dtrace_fuword8_nocheck(void *addr)
341*/
342	ENTRY(dtrace_fuword8_nocheck_nosmap)
343	xorq	%rax, %rax
344	movb	(%rdi), %al
345	ret
346	END(dtrace_fuword8_nocheck_nosmap)
347
348	ENTRY(dtrace_fuword8_nocheck_smap)
349	stac
350	xorq	%rax, %rax
351	movb	(%rdi), %al
352	clac
353	ret
354	END(dtrace_fuword8_nocheck_smap)
355
356/*
357uint16_t
358dtrace_fuword16_nocheck(void *addr)
359*/
360	ENTRY(dtrace_fuword16_nocheck_nosmap)
361	xorq	%rax, %rax
362	movw	(%rdi), %ax
363	ret
364	END(dtrace_fuword16_nocheck_nosmap)
365
366	ENTRY(dtrace_fuword16_nocheck_smap)
367	stac
368	xorq	%rax, %rax
369	movw	(%rdi), %ax
370	clac
371	ret
372	END(dtrace_fuword16_nocheck_smap)
373
374/*
375uint32_t
376dtrace_fuword32_nocheck(void *addr)
377*/
378	ENTRY(dtrace_fuword32_nocheck_nosmap)
379	xorq	%rax, %rax
380	movl	(%rdi), %eax
381	ret
382	END(dtrace_fuword32_nocheck_nosmap)
383
384	ENTRY(dtrace_fuword32_nocheck_smap)
385	stac
386	xorq	%rax, %rax
387	movl	(%rdi), %eax
388	clac
389	ret
390	END(dtrace_fuword32_nocheck_smap)
391
392/*
393uint64_t
394dtrace_fuword64_nocheck(void *addr)
395*/
396	ENTRY(dtrace_fuword64_nocheck_nosmap)
397	movq	(%rdi), %rax
398	ret
399	END(dtrace_fuword64_nocheck_nosmap)
400
401	ENTRY(dtrace_fuword64_nocheck_smap)
402	stac
403	movq	(%rdi), %rax
404	clac
405	ret
406	END(dtrace_fuword64_nocheck_smap)
407
408/*
409void
410dtrace_probe_error(dtrace_state_t *state, dtrace_epid_t epid, int which,
411    int fault, int fltoffs, uintptr_t illval)
412*/
413	ENTRY(dtrace_probe_error)
414	pushq	%rbp
415	movq	%rsp, %rbp
416	subq	$0x8, %rsp
417	movq	%r9, (%rsp)
418	movq	%r8, %r9
419	movq	%rcx, %r8
420	movq	%rdx, %rcx
421	movq	%rsi, %rdx
422	movq	%rdi, %rsi
423	movl	dtrace_probeid_error(%rip), %edi
424	call	dtrace_probe
425	addq	$0x8, %rsp
426	leave
427	ret
428	END(dtrace_probe_error)
429
430/*
431void
432dtrace_membar_producer(void)
433*/
434	ENTRY(dtrace_membar_producer)
435	rep;	ret	/* use 2 byte return instruction when branch target */
436			/* AMD Software Optimization Guide - Section 6.2 */
437	END(dtrace_membar_producer)
438
439/*
440void
441dtrace_membar_consumer(void)
442*/
443	ENTRY(dtrace_membar_consumer)
444	rep;	ret	/* use 2 byte return instruction when branch target */
445			/* AMD Software Optimization Guide - Section 6.2 */
446	END(dtrace_membar_consumer)
447
448/*
449dtrace_icookie_t
450dtrace_interrupt_disable(void)
451*/
452	ENTRY(dtrace_interrupt_disable)
453	pushfq
454	popq	%rax
455	cli
456	ret
457	END(dtrace_interrupt_disable)
458
459/*
460void
461dtrace_interrupt_enable(dtrace_icookie_t cookie)
462*/
463	ENTRY(dtrace_interrupt_enable)
464	pushq	%rdi
465	popfq
466	ret
467	END(dtrace_interrupt_enable)
468