xref: /freebsd/sys/cddl/dev/dtrace/amd64/dtrace_asm.S (revision f0d5b1bdf075c68ddb1dcfbc5a0eda0214510b5b)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 *
21 * Portions Copyright 2008 John Birrell <jb@freebsd.org>
22 *
23 */
24/*
25 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
26 * Use is subject to license terms.
27 */
28
29#define _ASM
30
31#include <machine/asmacros.h>
32#include <sys/cpuvar_defs.h>
33#include <sys/dtrace.h>
34
35#include "assym.inc"
36
37#define INTR_POP				\
38	movq	TF_RDI(%rsp),%rdi;		\
39	movq	TF_RSI(%rsp),%rsi;		\
40	movq	TF_RDX(%rsp),%rdx;		\
41	movq	TF_RCX(%rsp),%rcx;		\
42	movq	TF_R8(%rsp),%r8;		\
43	movq	TF_R9(%rsp),%r9;		\
44	movq	TF_RAX(%rsp),%rax;		\
45	movq	TF_RBX(%rsp),%rbx;		\
46	movq	TF_RBP(%rsp),%rbp;		\
47	movq	TF_R10(%rsp),%r10;		\
48	movq	TF_R11(%rsp),%r11;		\
49	movq	TF_R12(%rsp),%r12;		\
50	movq	TF_R13(%rsp),%r13;		\
51	movq	TF_R14(%rsp),%r14;		\
52	movq	TF_R15(%rsp),%r15;		\
53	testb	$SEL_RPL_MASK,TF_CS(%rsp);	\
54	jz	1f;				\
55	cli;					\
56	swapgs;					\
571:	addq	$TF_RIP,%rsp;
58
59	ENTRY(dtrace_invop_start)
60
61	/*
62	 * #BP traps with %rip set to the next address. We need to decrement
63	 * the value to indicate the address of the int3 (0xcc) instruction
64	 * that we substituted.
65	 */
66	movq	TF_RIP(%rsp), %rdi
67	decq	%rdi
68	movq	%rsp, %rsi
69
70	/*
71	 * Allocate some scratch space to let the invop handler return a value.
72	 * This is needed when emulating "call" instructions.
73	 */
74	subq	$16, %rsp
75	movq	%rsp, %rdx
76
77	call	dtrace_invop
78	addq	$16, %rsp
79
80	cmpl	$DTRACE_INVOP_PUSHL_EBP, %eax
81	je	bp_push
82	cmpl	$DTRACE_INVOP_CALL, %eax
83	je	bp_call
84	cmpl	$DTRACE_INVOP_LEAVE, %eax
85	je	bp_leave
86	cmpl	$DTRACE_INVOP_NOP, %eax
87	je	bp_nop
88	cmpl	$DTRACE_INVOP_RET, %eax
89	je	bp_ret
90
91	/* When all else fails handle the trap in the usual way. */
92	jmpq	*dtrace_invop_calltrap_addr
93
94bp_push:
95	/*
96	 * We must emulate a "pushq %rbp".  To do this, we pull the stack
97	 * down 8 bytes, and then store the base pointer.
98	 */
99	INTR_POP
100	subq	$16, %rsp		/* make room for %rbp */
101	pushq	%rax			/* push temp */
102	movq	24(%rsp), %rax		/* load calling RIP */
103	movq	%rax, 8(%rsp)		/* store calling RIP */
104	movq	32(%rsp), %rax		/* load calling CS */
105	movq	%rax, 16(%rsp)		/* store calling CS */
106	movq	40(%rsp), %rax		/* load calling RFLAGS */
107	movq	%rax, 24(%rsp)		/* store calling RFLAGS */
108	movq	48(%rsp), %rax		/* load calling RSP */
109	subq	$8, %rax		/* make room for %rbp */
110	movq	%rax, 32(%rsp)		/* store calling RSP */
111	movq	56(%rsp), %rax		/* load calling SS */
112	movq	%rax, 40(%rsp)		/* store calling SS */
113	movq	32(%rsp), %rax		/* reload calling RSP */
114	movq	%rbp, (%rax)		/* store %rbp there */
115	popq	%rax			/* pop off temp */
116	iretq				/* return from interrupt */
117	/*NOTREACHED*/
118
119bp_call:
120	/*
121	 * Emulate a "call" instruction.  The invop handler must have already
122	 * updated the saved copy of %rip in the register set.  It's our job to
123	 * pull the hardware-saved registers down to make space for the return
124	 * address, which is provided by the invop handler in our scratch
125	 * space.
126	 */
127	INTR_POP
128	subq	$16, %rsp		/* make room for %rbp */
129	pushq	%rax			/* push temp */
130	pushq	%rbx			/* push temp */
131
132	movq	32(%rsp), %rax		/* load calling RIP */
133	movq	%rax, 16(%rsp)		/* store calling RIP */
134	movq	40(%rsp), %rax		/* load calling CS */
135	movq	%rax, 24(%rsp)		/* store calling CS */
136	movq	48(%rsp), %rax		/* load calling RFLAGS */
137	movq	%rax, 32(%rsp)		/* store calling RFLAGS */
138	movq	56(%rsp), %rax		/* load calling RSP */
139	subq	$8, %rax		/* make room for return address */
140	movq	%rax, 40(%rsp)		/* store calling RSP */
141	movq	64(%rsp), %rax		/* load calling SS */
142	movq	%rax, 48(%rsp)		/* store calling SS */
143
144	movq	-(TF_RIP - 16)(%rsp), %rax /* load return address */
145	movq	40(%rsp), %rbx		/* reload calling RSP */
146	movq	%rax, (%rbx)		/* store return address */
147
148	popq	%rbx			/* pop temp */
149	popq	%rax			/* pop temp */
150	iretq				/* return from interrupt */
151	/*NOTREACHED*/
152
153bp_leave:
154	/*
155	 * We must emulate a "leave", which is the same as a "movq %rbp, %rsp"
156	 * followed by a "popq %rbp".  This is quite a bit simpler on amd64
157	 * than it is on i386 -- we can exploit the fact that the %rsp is
158	 * explicitly saved to effect the pop without having to reshuffle
159	 * the other data pushed for the trap.
160	 */
161	INTR_POP
162	pushq	%rax			/* push temp */
163	movq	8(%rsp), %rax		/* load calling RIP */
164	movq	%rax, 8(%rsp)		/* store calling RIP */
165	movq	(%rbp), %rax		/* get new %rbp */
166	addq	$8, %rbp		/* adjust new %rsp */
167	movq	%rbp, 32(%rsp)		/* store new %rsp */
168	movq	%rax, %rbp		/* set new %rbp */
169	popq	%rax			/* pop off temp */
170	iretq				/* return from interrupt */
171	/*NOTREACHED*/
172
173bp_nop:
174	/* We must emulate a "nop". */
175	INTR_POP
176	iretq
177	/*NOTREACHED*/
178
179bp_ret:
180	INTR_POP
181	pushq	%rax			/* push temp */
182	movq	32(%rsp), %rax		/* load %rsp */
183	movq	(%rax), %rax		/* load calling RIP */
184	movq	%rax, 8(%rsp)		/* store calling RIP */
185	addq	$8, 32(%rsp)		/* adjust new %rsp */
186	popq	%rax			/* pop off temp */
187	iretq				/* return from interrupt */
188	/*NOTREACHED*/
189
190	END(dtrace_invop_start)
191
192/*
193greg_t dtrace_getfp(void)
194*/
195	ENTRY(dtrace_getfp)
196	movq	%rbp, %rax
197	ret
198	END(dtrace_getfp)
199
200/*
201uint32_t
202dtrace_cas32(uint32_t *target, uint32_t cmp, uint32_t new)
203*/
204	ENTRY(dtrace_cas32)
205	movl	%esi, %eax
206	lock
207	cmpxchgl %edx, (%rdi)
208	ret
209	END(dtrace_cas32)
210
211/*
212void *
213dtrace_casptr(void *target, void *cmp, void *new)
214*/
215	ENTRY(dtrace_casptr)
216	movq	%rsi, %rax
217	lock
218	cmpxchgq %rdx, (%rdi)
219	ret
220	END(dtrace_casptr)
221
222/*
223uintptr_t
224dtrace_caller(int aframes)
225*/
226	ENTRY(dtrace_caller)
227	movq	$-1, %rax
228	ret
229	END(dtrace_caller)
230
231/*
232void
233dtrace_copy(uintptr_t src, uintptr_t dest, size_t size)
234*/
235	ENTRY(dtrace_copy_nosmap)
236	pushq	%rbp
237	movq	%rsp, %rbp
238
239	xchgq	%rdi, %rsi		/* make %rsi source, %rdi dest */
240	movq	%rdx, %rcx		/* load count */
241	repz				/* repeat for count ... */
242	smovb				/*   move from %ds:rsi to %ed:rdi */
243	leave
244	ret
245	END(dtrace_copy_nosmap)
246
247	ENTRY(dtrace_copy_smap)
248	pushq	%rbp
249	movq	%rsp, %rbp
250
251	xchgq	%rdi, %rsi		/* make %rsi source, %rdi dest */
252	movq	%rdx, %rcx		/* load count */
253	stac
254	repz				/* repeat for count ... */
255	smovb				/*   move from %ds:rsi to %ed:rdi */
256	clac
257	leave
258	ret
259	END(dtrace_copy_smap)
260
261/*
262void
263dtrace_copystr(uintptr_t uaddr, uintptr_t kaddr, size_t size,
264    volatile uint16_t *flags)
265*/
266	ENTRY(dtrace_copystr_nosmap)
267	pushq	%rbp
268	movq	%rsp, %rbp
269
2700:
271	movb	(%rdi), %al		/* load from source */
272	movb	%al, (%rsi)		/* store to destination */
273	addq	$1, %rdi		/* increment source pointer */
274	addq	$1, %rsi		/* increment destination pointer */
275	subq	$1, %rdx		/* decrement remaining count */
276	cmpb	$0, %al
277	je	2f
278	testq	$0xfff, %rdx		/* test if count is 4k-aligned */
279	jnz	1f			/* if not, continue with copying */
280	testq	$CPU_DTRACE_BADADDR, (%rcx) /* load and test dtrace flags */
281	jnz	2f
2821:
283	cmpq	$0, %rdx
284	jne	0b
2852:
286	leave
287	ret
288
289	END(dtrace_copystr_nosmap)
290
291	ENTRY(dtrace_copystr_smap)
292	pushq	%rbp
293	movq	%rsp, %rbp
294
295	stac
2960:
297	movb	(%rdi), %al		/* load from source */
298	movb	%al, (%rsi)		/* store to destination */
299	addq	$1, %rdi		/* increment source pointer */
300	addq	$1, %rsi		/* increment destination pointer */
301	subq	$1, %rdx		/* decrement remaining count */
302	cmpb	$0, %al
303	je	2f
304	testq	$0xfff, %rdx		/* test if count is 4k-aligned */
305	jnz	1f			/* if not, continue with copying */
306	testq	$CPU_DTRACE_BADADDR, (%rcx) /* load and test dtrace flags */
307	jnz	2f
3081:
309	cmpq	$0, %rdx
310	jne	0b
3112:
312	clac
313	leave
314	ret
315
316	END(dtrace_copystr_smap)
317
318/*
319uintptr_t
320dtrace_fulword(void *addr)
321*/
322	ENTRY(dtrace_fulword_nosmap)
323	movq	(%rdi), %rax
324	ret
325	END(dtrace_fulword_nosmap)
326
327	ENTRY(dtrace_fulword_smap)
328	stac
329	movq	(%rdi), %rax
330	clac
331	ret
332	END(dtrace_fulword_smap)
333
334/*
335uint8_t
336dtrace_fuword8_nocheck(void *addr)
337*/
338	ENTRY(dtrace_fuword8_nocheck_nosmap)
339	xorq	%rax, %rax
340	movb	(%rdi), %al
341	ret
342	END(dtrace_fuword8_nocheck_nosmap)
343
344	ENTRY(dtrace_fuword8_nocheck_smap)
345	stac
346	xorq	%rax, %rax
347	movb	(%rdi), %al
348	clac
349	ret
350	END(dtrace_fuword8_nocheck_smap)
351
352/*
353uint16_t
354dtrace_fuword16_nocheck(void *addr)
355*/
356	ENTRY(dtrace_fuword16_nocheck_nosmap)
357	xorq	%rax, %rax
358	movw	(%rdi), %ax
359	ret
360	END(dtrace_fuword16_nocheck_nosmap)
361
362	ENTRY(dtrace_fuword16_nocheck_smap)
363	stac
364	xorq	%rax, %rax
365	movw	(%rdi), %ax
366	clac
367	ret
368	END(dtrace_fuword16_nocheck_smap)
369
370/*
371uint32_t
372dtrace_fuword32_nocheck(void *addr)
373*/
374	ENTRY(dtrace_fuword32_nocheck_nosmap)
375	xorq	%rax, %rax
376	movl	(%rdi), %eax
377	ret
378	END(dtrace_fuword32_nocheck_nosmap)
379
380	ENTRY(dtrace_fuword32_nocheck_smap)
381	stac
382	xorq	%rax, %rax
383	movl	(%rdi), %eax
384	clac
385	ret
386	END(dtrace_fuword32_nocheck_smap)
387
388/*
389uint64_t
390dtrace_fuword64_nocheck(void *addr)
391*/
392	ENTRY(dtrace_fuword64_nocheck_nosmap)
393	movq	(%rdi), %rax
394	ret
395	END(dtrace_fuword64_nocheck_nosmap)
396
397	ENTRY(dtrace_fuword64_nocheck_smap)
398	stac
399	movq	(%rdi), %rax
400	clac
401	ret
402	END(dtrace_fuword64_nocheck_smap)
403
404/*
405void
406dtrace_probe_error(dtrace_state_t *state, dtrace_epid_t epid, int which,
407    int fault, int fltoffs, uintptr_t illval)
408*/
409	ENTRY(dtrace_probe_error)
410	pushq	%rbp
411	movq	%rsp, %rbp
412	subq	$0x8, %rsp
413	movq	%r9, (%rsp)
414	movq	%r8, %r9
415	movq	%rcx, %r8
416	movq	%rdx, %rcx
417	movq	%rsi, %rdx
418	movq	%rdi, %rsi
419	movl	dtrace_probeid_error(%rip), %edi
420	call	dtrace_probe
421	addq	$0x8, %rsp
422	leave
423	ret
424	END(dtrace_probe_error)
425
426/*
427void
428dtrace_membar_producer(void)
429*/
430	ENTRY(dtrace_membar_producer)
431	rep;	ret	/* use 2 byte return instruction when branch target */
432			/* AMD Software Optimization Guide - Section 6.2 */
433	END(dtrace_membar_producer)
434
435/*
436void
437dtrace_membar_consumer(void)
438*/
439	ENTRY(dtrace_membar_consumer)
440	rep;	ret	/* use 2 byte return instruction when branch target */
441			/* AMD Software Optimization Guide - Section 6.2 */
442	END(dtrace_membar_consumer)
443
444/*
445dtrace_icookie_t
446dtrace_interrupt_disable(void)
447*/
448	ENTRY(dtrace_interrupt_disable)
449	pushfq
450	popq	%rax
451	cli
452	ret
453	END(dtrace_interrupt_disable)
454
455/*
456void
457dtrace_interrupt_enable(dtrace_icookie_t cookie)
458*/
459	ENTRY(dtrace_interrupt_enable)
460	pushq	%rdi
461	popfq
462	ret
463	END(dtrace_interrupt_enable)
464