xref: /freebsd/sys/cddl/dev/dtrace/amd64/dtrace_asm.S (revision b2d2a78ad80ec68d4a17f5aef97d21686cb1e29b)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 *
21 * Portions Copyright 2008 John Birrell <jb@freebsd.org>
22 *
23 */
24/*
25 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
26 * Use is subject to license terms.
27 */
28
29#define _ASM
30
31#include <machine/asmacros.h>
32#include <sys/cpuvar_defs.h>
33#include <sys/dtrace.h>
34
35#include "assym.inc"
36
37#define INTR_POP				\
38	movq	TF_RDI(%rsp),%rdi;		\
39	movq	TF_RSI(%rsp),%rsi;		\
40	movq	TF_RDX(%rsp),%rdx;		\
41	movq	TF_RCX(%rsp),%rcx;		\
42	movq	TF_R8(%rsp),%r8;		\
43	movq	TF_R9(%rsp),%r9;		\
44	movq	TF_RAX(%rsp),%rax;		\
45	movq	TF_RBX(%rsp),%rbx;		\
46	movq	TF_RBP(%rsp),%rbp;		\
47	movq	TF_R10(%rsp),%r10;		\
48	movq	TF_R11(%rsp),%r11;		\
49	movq	TF_R12(%rsp),%r12;		\
50	movq	TF_R13(%rsp),%r13;		\
51	movq	TF_R14(%rsp),%r14;		\
52	movq	TF_R15(%rsp),%r15;		\
53	testb	$SEL_RPL_MASK,TF_CS(%rsp);	\
54	jz	1f;				\
55	cli;					\
56	swapgs;					\
571:	addq	$TF_RIP,%rsp;
58
59	ENTRY(dtrace_invop_start)
60
61	KMSAN_ENTER
62
63	/*
64	 * #BP traps with %rip set to the next address. We need to decrement
65	 * the value to indicate the address of the int3 (0xcc) instruction
66	 * that we substituted.
67	 */
68	movq	TF_RIP(%rsp), %rdi
69	decq	%rdi
70	movq	%rsp, %rsi
71
72	/*
73	 * Allocate some scratch space to let the invop handler return a value.
74	 * This is needed when emulating "call" instructions.
75	 */
76	subq	$16, %rsp
77	movq	%rsp, %rdx
78
79	call	dtrace_invop
80	addq	$16, %rsp
81
82#ifdef KMSAN
83	movq	%rax, %r12
84	KMSAN_LEAVE
85	movq	%r12, %rax
86#endif
87
88	cmpl	$DTRACE_INVOP_PUSHL_EBP, %eax
89	je	bp_push
90	cmpl	$DTRACE_INVOP_CALL, %eax
91	je	bp_call
92	cmpl	$DTRACE_INVOP_LEAVE, %eax
93	je	bp_leave
94	cmpl	$DTRACE_INVOP_NOP, %eax
95	je	bp_nop
96	cmpl	$DTRACE_INVOP_RET, %eax
97	je	bp_ret
98
99	/* When all else fails handle the trap in the usual way. */
100	jmpq	*dtrace_invop_calltrap_addr
101
102bp_push:
103	/*
104	 * We must emulate a "pushq %rbp".  To do this, we pull the stack
105	 * down 8 bytes, and then store the base pointer.
106	 */
107	INTR_POP
108	subq	$16, %rsp		/* make room for %rbp */
109	pushq	%rax			/* push temp */
110	movq	24(%rsp), %rax		/* load calling RIP */
111	movq	%rax, 8(%rsp)		/* store calling RIP */
112	movq	32(%rsp), %rax		/* load calling CS */
113	movq	%rax, 16(%rsp)		/* store calling CS */
114	movq	40(%rsp), %rax		/* load calling RFLAGS */
115	movq	%rax, 24(%rsp)		/* store calling RFLAGS */
116	movq	48(%rsp), %rax		/* load calling RSP */
117	subq	$8, %rax		/* make room for %rbp */
118	movq	%rax, 32(%rsp)		/* store calling RSP */
119	movq	56(%rsp), %rax		/* load calling SS */
120	movq	%rax, 40(%rsp)		/* store calling SS */
121	movq	32(%rsp), %rax		/* reload calling RSP */
122	movq	%rbp, (%rax)		/* store %rbp there */
123	popq	%rax			/* pop off temp */
124	iretq				/* return from interrupt */
125	/*NOTREACHED*/
126
127bp_call:
128	/*
129	 * Emulate a "call" instruction.  The invop handler must have already
130	 * updated the saved copy of %rip in the register set.  It's our job to
131	 * pull the hardware-saved registers down to make space for the return
132	 * address, which is provided by the invop handler in our scratch
133	 * space.
134	 */
135	INTR_POP
136	subq	$16, %rsp		/* make room for %rbp */
137	pushq	%rax			/* push temp */
138	pushq	%rbx			/* push temp */
139
140	movq	32(%rsp), %rax		/* load calling RIP */
141	movq	%rax, 16(%rsp)		/* store calling RIP */
142	movq	40(%rsp), %rax		/* load calling CS */
143	movq	%rax, 24(%rsp)		/* store calling CS */
144	movq	48(%rsp), %rax		/* load calling RFLAGS */
145	movq	%rax, 32(%rsp)		/* store calling RFLAGS */
146	movq	56(%rsp), %rax		/* load calling RSP */
147	subq	$8, %rax		/* make room for return address */
148	movq	%rax, 40(%rsp)		/* store calling RSP */
149	movq	64(%rsp), %rax		/* load calling SS */
150	movq	%rax, 48(%rsp)		/* store calling SS */
151
152	movq	-(TF_RIP - 16)(%rsp), %rax /* load return address */
153	movq	40(%rsp), %rbx		/* reload calling RSP */
154	movq	%rax, (%rbx)		/* store return address */
155
156	popq	%rbx			/* pop temp */
157	popq	%rax			/* pop temp */
158	iretq				/* return from interrupt */
159	/*NOTREACHED*/
160
161bp_leave:
162	/*
163	 * We must emulate a "leave", which is the same as a "movq %rbp, %rsp"
164	 * followed by a "popq %rbp".  This is quite a bit simpler on amd64
165	 * than it is on i386 -- we can exploit the fact that the %rsp is
166	 * explicitly saved to effect the pop without having to reshuffle
167	 * the other data pushed for the trap.
168	 */
169	INTR_POP
170	pushq	%rax			/* push temp */
171	movq	8(%rsp), %rax		/* load calling RIP */
172	movq	%rax, 8(%rsp)		/* store calling RIP */
173	movq	(%rbp), %rax		/* get new %rbp */
174	addq	$8, %rbp		/* adjust new %rsp */
175	movq	%rbp, 32(%rsp)		/* store new %rsp */
176	movq	%rax, %rbp		/* set new %rbp */
177	popq	%rax			/* pop off temp */
178	iretq				/* return from interrupt */
179	/*NOTREACHED*/
180
181bp_nop:
182	/* We must emulate a "nop". */
183	INTR_POP
184	iretq
185	/*NOTREACHED*/
186
187bp_ret:
188	INTR_POP
189	pushq	%rax			/* push temp */
190	movq	32(%rsp), %rax		/* load %rsp */
191	movq	(%rax), %rax		/* load calling RIP */
192	movq	%rax, 8(%rsp)		/* store calling RIP */
193	addq	$8, 32(%rsp)		/* adjust new %rsp */
194	popq	%rax			/* pop off temp */
195	iretq				/* return from interrupt */
196	/*NOTREACHED*/
197
198	END(dtrace_invop_start)
199
200/*
201greg_t dtrace_getfp(void)
202*/
203	ENTRY(dtrace_getfp)
204	movq	%rbp, %rax
205	ret
206	END(dtrace_getfp)
207
208/*
209uint32_t
210dtrace_cas32(uint32_t *target, uint32_t cmp, uint32_t new)
211*/
212	ENTRY(dtrace_cas32)
213	movl	%esi, %eax
214	lock
215	cmpxchgl %edx, (%rdi)
216	ret
217	END(dtrace_cas32)
218
219/*
220void *
221dtrace_casptr(void *target, void *cmp, void *new)
222*/
223	ENTRY(dtrace_casptr)
224	movq	%rsi, %rax
225	lock
226	cmpxchgq %rdx, (%rdi)
227	ret
228	END(dtrace_casptr)
229
230/*
231uintptr_t
232dtrace_caller(int aframes)
233*/
234	ENTRY(dtrace_caller)
235	movq	$-1, %rax
236	ret
237	END(dtrace_caller)
238
239/*
240void
241dtrace_copy(uintptr_t src, uintptr_t dest, size_t size)
242*/
243	ENTRY(dtrace_copy_nosmap)
244	pushq	%rbp
245	movq	%rsp, %rbp
246
247	xchgq	%rdi, %rsi		/* make %rsi source, %rdi dest */
248	movq	%rdx, %rcx		/* load count */
249	repz				/* repeat for count ... */
250	smovb				/*   move from %ds:rsi to %ed:rdi */
251	leave
252	ret
253	END(dtrace_copy_nosmap)
254
255	ENTRY(dtrace_copy_smap)
256	pushq	%rbp
257	movq	%rsp, %rbp
258
259	xchgq	%rdi, %rsi		/* make %rsi source, %rdi dest */
260	movq	%rdx, %rcx		/* load count */
261	stac
262	repz				/* repeat for count ... */
263	smovb				/*   move from %ds:rsi to %ed:rdi */
264	clac
265	leave
266	ret
267	END(dtrace_copy_smap)
268
269/*
270void
271dtrace_copystr(uintptr_t uaddr, uintptr_t kaddr, size_t size,
272    volatile uint16_t *flags)
273*/
274	ENTRY(dtrace_copystr_nosmap)
275	pushq	%rbp
276	movq	%rsp, %rbp
277
2780:
279	movb	(%rdi), %al		/* load from source */
280	movb	%al, (%rsi)		/* store to destination */
281	addq	$1, %rdi		/* increment source pointer */
282	addq	$1, %rsi		/* increment destination pointer */
283	subq	$1, %rdx		/* decrement remaining count */
284	cmpb	$0, %al
285	je	2f
286	testq	$0xfff, %rdx		/* test if count is 4k-aligned */
287	jnz	1f			/* if not, continue with copying */
288	testq	$CPU_DTRACE_BADADDR, (%rcx) /* load and test dtrace flags */
289	jnz	2f
2901:
291	cmpq	$0, %rdx
292	jne	0b
2932:
294	leave
295	ret
296
297	END(dtrace_copystr_nosmap)
298
299	ENTRY(dtrace_copystr_smap)
300	pushq	%rbp
301	movq	%rsp, %rbp
302
303	stac
3040:
305	movb	(%rdi), %al		/* load from source */
306	movb	%al, (%rsi)		/* store to destination */
307	addq	$1, %rdi		/* increment source pointer */
308	addq	$1, %rsi		/* increment destination pointer */
309	subq	$1, %rdx		/* decrement remaining count */
310	cmpb	$0, %al
311	je	2f
312	testq	$0xfff, %rdx		/* test if count is 4k-aligned */
313	jnz	1f			/* if not, continue with copying */
314	testq	$CPU_DTRACE_BADADDR, (%rcx) /* load and test dtrace flags */
315	jnz	2f
3161:
317	cmpq	$0, %rdx
318	jne	0b
3192:
320	clac
321	leave
322	ret
323
324	END(dtrace_copystr_smap)
325
326/*
327uintptr_t
328dtrace_fulword(void *addr)
329*/
330	ENTRY(dtrace_fulword_nosmap)
331	movq	(%rdi), %rax
332	ret
333	END(dtrace_fulword_nosmap)
334
335	ENTRY(dtrace_fulword_smap)
336	stac
337	movq	(%rdi), %rax
338	clac
339	ret
340	END(dtrace_fulword_smap)
341
342/*
343uint8_t
344dtrace_fuword8_nocheck(void *addr)
345*/
346	ENTRY(dtrace_fuword8_nocheck_nosmap)
347	xorq	%rax, %rax
348	movb	(%rdi), %al
349	ret
350	END(dtrace_fuword8_nocheck_nosmap)
351
352	ENTRY(dtrace_fuword8_nocheck_smap)
353	stac
354	xorq	%rax, %rax
355	movb	(%rdi), %al
356	clac
357	ret
358	END(dtrace_fuword8_nocheck_smap)
359
360/*
361uint16_t
362dtrace_fuword16_nocheck(void *addr)
363*/
364	ENTRY(dtrace_fuword16_nocheck_nosmap)
365	xorq	%rax, %rax
366	movw	(%rdi), %ax
367	ret
368	END(dtrace_fuword16_nocheck_nosmap)
369
370	ENTRY(dtrace_fuword16_nocheck_smap)
371	stac
372	xorq	%rax, %rax
373	movw	(%rdi), %ax
374	clac
375	ret
376	END(dtrace_fuword16_nocheck_smap)
377
378/*
379uint32_t
380dtrace_fuword32_nocheck(void *addr)
381*/
382	ENTRY(dtrace_fuword32_nocheck_nosmap)
383	xorq	%rax, %rax
384	movl	(%rdi), %eax
385	ret
386	END(dtrace_fuword32_nocheck_nosmap)
387
388	ENTRY(dtrace_fuword32_nocheck_smap)
389	stac
390	xorq	%rax, %rax
391	movl	(%rdi), %eax
392	clac
393	ret
394	END(dtrace_fuword32_nocheck_smap)
395
396/*
397uint64_t
398dtrace_fuword64_nocheck(void *addr)
399*/
400	ENTRY(dtrace_fuword64_nocheck_nosmap)
401	movq	(%rdi), %rax
402	ret
403	END(dtrace_fuword64_nocheck_nosmap)
404
405	ENTRY(dtrace_fuword64_nocheck_smap)
406	stac
407	movq	(%rdi), %rax
408	clac
409	ret
410	END(dtrace_fuword64_nocheck_smap)
411
412/*
413void
414dtrace_probe_error(dtrace_state_t *state, dtrace_epid_t epid, int which,
415    int fault, int fltoffs, uintptr_t illval)
416*/
417	ENTRY(dtrace_probe_error)
418	pushq	%rbp
419	movq	%rsp, %rbp
420	subq	$0x8, %rsp
421	movq	%r9, (%rsp)
422	movq	%r8, %r9
423	movq	%rcx, %r8
424	movq	%rdx, %rcx
425	movq	%rsi, %rdx
426	movq	%rdi, %rsi
427	movl	dtrace_probeid_error(%rip), %edi
428	call	dtrace_probe
429	addq	$0x8, %rsp
430	leave
431	ret
432	END(dtrace_probe_error)
433
434/*
435void
436dtrace_membar_producer(void)
437*/
438	ENTRY(dtrace_membar_producer)
439	rep;	ret	/* use 2 byte return instruction when branch target */
440			/* AMD Software Optimization Guide - Section 6.2 */
441	END(dtrace_membar_producer)
442
443/*
444void
445dtrace_membar_consumer(void)
446*/
447	ENTRY(dtrace_membar_consumer)
448	rep;	ret	/* use 2 byte return instruction when branch target */
449			/* AMD Software Optimization Guide - Section 6.2 */
450	END(dtrace_membar_consumer)
451
452/*
453dtrace_icookie_t
454dtrace_interrupt_disable(void)
455*/
456	ENTRY(dtrace_interrupt_disable)
457	pushfq
458	popq	%rax
459	cli
460	ret
461	END(dtrace_interrupt_disable)
462
463/*
464void
465dtrace_interrupt_enable(dtrace_icookie_t cookie)
466*/
467	ENTRY(dtrace_interrupt_enable)
468	pushq	%rdi
469	popfq
470	ret
471	END(dtrace_interrupt_enable)
472