xref: /freebsd/sys/amd64/amd64/support.S (revision a319696875451229f492b6c15e58a0ac54dbcda1)
1/*-
2 * Copyright (c) 2018-2019 The FreeBSD Foundation
3 * Copyright (c) 2003 Peter Wemm.
4 * Copyright (c) 1993 The Regents of the University of California.
5 * All rights reserved.
6 *
7 * Portions of this software were developed by
8 * Konstantin Belousov <kib@FreeBSD.org> under sponsorship from
9 * the FreeBSD Foundation.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 *    may be used to endorse or promote products derived from this software
21 *    without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36#include "opt_ddb.h"
37
38#include <machine/asmacros.h>
39#include <machine/specialreg.h>
40#include <machine/pmap.h>
41
42#include "assym.inc"
43
44	.text
45
46/* Address: %rdi */
47ENTRY(pagezero_std)
48	PUSH_FRAME_POINTER
49	movl	$PAGE_SIZE/8,%ecx
50	xorl	%eax,%eax
51	rep
52	stosq
53	POP_FRAME_POINTER
54	ret
55END(pagezero_std)
56
57ENTRY(pagezero_erms)
58	PUSH_FRAME_POINTER
59	movl	$PAGE_SIZE,%ecx
60	xorl	%eax,%eax
61	rep
62	stosb
63	POP_FRAME_POINTER
64	ret
65END(pagezero_erms)
66
67/*
68 * pagecopy(%rdi=from, %rsi=to)
69 */
70ENTRY(pagecopy)
71	PUSH_FRAME_POINTER
72	movl	$PAGE_SIZE/8,%ecx
73	movq	%rdi,%r9
74	movq	%rsi,%rdi
75	movq	%r9,%rsi
76	rep
77	movsq
78	POP_FRAME_POINTER
79	ret
80END(pagecopy)
81
82/*
83 * memcmp(b1, b2, len)
84 *	   rdi,rsi,rdx
85 */
86ENTRY(memcmp)
87	PUSH_FRAME_POINTER
88
89	xorl	%eax,%eax
9010:
91	cmpq	$16,%rdx
92	ja	101632f
93
94	cmpb	$8,%dl
95	jg	100816f
96
97	cmpb	$4,%dl
98	jg	100408f
99
100	cmpb	$2,%dl
101	jge	100204f
102
103	cmpb	$1,%dl
104	jl	100000f
105	movzbl	(%rdi),%eax
106	movzbl	(%rsi),%r8d
107	subl	%r8d,%eax
108100000:
109	POP_FRAME_POINTER
110	ret
111
112	ALIGN_TEXT
113100816:
114	movq	(%rdi),%r8
115	movq	(%rsi),%r9
116	cmpq	%r8,%r9
117	jne	80f
118	movq	-8(%rdi,%rdx),%r8
119	movq	-8(%rsi,%rdx),%r9
120	cmpq	%r8,%r9
121	jne	10081608f
122	POP_FRAME_POINTER
123	ret
124	ALIGN_TEXT
125100408:
126	movl	(%rdi),%r8d
127	movl	(%rsi),%r9d
128	cmpl	%r8d,%r9d
129	jne	80f
130	movl	-4(%rdi,%rdx),%r8d
131	movl	-4(%rsi,%rdx),%r9d
132	cmpl	%r8d,%r9d
133	jne	10040804f
134	POP_FRAME_POINTER
135	ret
136	ALIGN_TEXT
137100204:
138	movzwl	(%rdi),%r8d
139	movzwl	(%rsi),%r9d
140	cmpl	%r8d,%r9d
141	jne	1f
142	movzwl	-2(%rdi,%rdx),%r8d
143	movzwl	-2(%rsi,%rdx),%r9d
144	cmpl	%r8d,%r9d
145	jne	1f
146	POP_FRAME_POINTER
147	ret
148	ALIGN_TEXT
149101632:
150	cmpq	$32,%rdx
151	ja	103200f
152	movq	(%rdi),%r8
153	movq	(%rsi),%r9
154	cmpq	%r8,%r9
155	jne	80f
156	movq	8(%rdi),%r8
157	movq	8(%rsi),%r9
158	cmpq	%r8,%r9
159	jne	10163208f
160	movq	-16(%rdi,%rdx),%r8
161	movq	-16(%rsi,%rdx),%r9
162	cmpq	%r8,%r9
163	jne	10163216f
164	movq	-8(%rdi,%rdx),%r8
165	movq	-8(%rsi,%rdx),%r9
166	cmpq	%r8,%r9
167	jne	10163224f
168	POP_FRAME_POINTER
169	ret
170	ALIGN_TEXT
171103200:
172	movq	(%rdi),%r8
173	movq	8(%rdi),%r9
174	subq	(%rsi),%r8
175	subq	8(%rsi),%r9
176	orq	%r8,%r9
177	jnz	10320000f
178
179	movq    16(%rdi),%r8
180	movq    24(%rdi),%r9
181	subq    16(%rsi),%r8
182	subq    24(%rsi),%r9
183	orq	%r8,%r9
184	jnz     10320016f
185
186	leaq	32(%rdi),%rdi
187	leaq	32(%rsi),%rsi
188	subq	$32,%rdx
189	cmpq	$32,%rdx
190	jae	103200b
191	cmpb	$0,%dl
192	jne	10b
193	POP_FRAME_POINTER
194	ret
195
196/*
197 * Mismatch was found.
198 *
199 * Before we compute it we narrow down the range (16 -> 8 -> 4 bytes).
200 */
201	ALIGN_TEXT
20210320016:
203	leaq	16(%rdi),%rdi
204	leaq	16(%rsi),%rsi
20510320000:
206	movq	(%rdi),%r8
207	movq	(%rsi),%r9
208	cmpq	%r8,%r9
209	jne	80f
210	leaq	8(%rdi),%rdi
211	leaq	8(%rsi),%rsi
212	jmp	80f
213	ALIGN_TEXT
21410081608:
21510163224:
216	leaq	-8(%rdi,%rdx),%rdi
217	leaq	-8(%rsi,%rdx),%rsi
218	jmp	80f
219	ALIGN_TEXT
22010163216:
221	leaq	-16(%rdi,%rdx),%rdi
222	leaq	-16(%rsi,%rdx),%rsi
223	jmp	80f
224	ALIGN_TEXT
22510163208:
226	leaq	8(%rdi),%rdi
227	leaq	8(%rsi),%rsi
228	jmp	80f
229	ALIGN_TEXT
23010040804:
231	leaq	-4(%rdi,%rdx),%rdi
232	leaq	-4(%rsi,%rdx),%rsi
233	jmp	1f
234
235	ALIGN_TEXT
23680:
237	movl	(%rdi),%r8d
238	movl	(%rsi),%r9d
239	cmpl	%r8d,%r9d
240	jne	1f
241	leaq	4(%rdi),%rdi
242	leaq	4(%rsi),%rsi
243
244/*
245 * We have up to 4 bytes to inspect.
246 */
2471:
248	movzbl	(%rdi),%eax
249	movzbl	(%rsi),%r8d
250	cmpb	%r8b,%al
251	jne	2f
252
253	movzbl	1(%rdi),%eax
254	movzbl	1(%rsi),%r8d
255	cmpb	%r8b,%al
256	jne	2f
257
258	movzbl	2(%rdi),%eax
259	movzbl	2(%rsi),%r8d
260	cmpb	%r8b,%al
261	jne	2f
262
263	movzbl	3(%rdi),%eax
264	movzbl	3(%rsi),%r8d
2652:
266	subl	%r8d,%eax
267	POP_FRAME_POINTER
268	ret
269END(memcmp)
270
271/*
272 * memmove(dst, src, cnt)
273 *         rdi, rsi, rdx
274 */
275
276/*
277 * Register state at entry is supposed to be as follows:
278 * rdi - destination
279 * rsi - source
280 * rdx - count
281 *
282 * The macro possibly clobbers the above and: rcx, r8, r9, r10
283 * It does not clobber rax nor r11.
284 */
285.macro MEMMOVE erms overlap begin end
286	\begin
287
288	/*
289	 * For sizes 0..32 all data is read before it is written, so there
290	 * is no correctness issue with direction of copying.
291	 */
292	cmpq	$32,%rcx
293	jbe	101632f
294
295.if \overlap == 1
296	movq	%rdi,%r8
297	subq	%rsi,%r8
298	cmpq	%rcx,%r8	/* overlapping && src < dst? */
299	jb	2f
300.endif
301
302	cmpq	$256,%rcx
303	ja	1256f
304
305	ALIGN_TEXT
306103200:
307	movq	(%rsi),%rdx
308	movq	%rdx,(%rdi)
309	movq	8(%rsi),%rdx
310	movq	%rdx,8(%rdi)
311	movq	16(%rsi),%rdx
312	movq	%rdx,16(%rdi)
313	movq	24(%rsi),%rdx
314	movq	%rdx,24(%rdi)
315	leaq	32(%rsi),%rsi
316	leaq	32(%rdi),%rdi
317	subq	$32,%rcx
318	cmpq	$32,%rcx
319	jae	103200b
320	cmpb	$0,%cl
321	jne	101632f
322	\end
323	ret
324	ALIGN_TEXT
325101632:
326	cmpb	$16,%cl
327	jl	100816f
328	movq	(%rsi),%rdx
329	movq	8(%rsi),%r8
330	movq	-16(%rsi,%rcx),%r9
331	movq	-8(%rsi,%rcx),%r10
332	movq	%rdx,(%rdi)
333	movq	%r8,8(%rdi)
334	movq	%r9,-16(%rdi,%rcx)
335	movq	%r10,-8(%rdi,%rcx)
336	\end
337	ret
338	ALIGN_TEXT
339100816:
340	cmpb	$8,%cl
341	jl	100408f
342	movq	(%rsi),%rdx
343	movq	-8(%rsi,%rcx),%r8
344	movq	%rdx,(%rdi)
345	movq	%r8,-8(%rdi,%rcx,)
346	\end
347	ret
348	ALIGN_TEXT
349100408:
350	cmpb	$4,%cl
351	jl	100204f
352	movl	(%rsi),%edx
353	movl	-4(%rsi,%rcx),%r8d
354	movl	%edx,(%rdi)
355	movl	%r8d,-4(%rdi,%rcx)
356	\end
357	ret
358	ALIGN_TEXT
359100204:
360	cmpb	$2,%cl
361	jl	100001f
362	movzwl	(%rsi),%edx
363	movzwl	-2(%rsi,%rcx),%r8d
364	movw	%dx,(%rdi)
365	movw	%r8w,-2(%rdi,%rcx)
366	\end
367	ret
368	ALIGN_TEXT
369100001:
370	cmpb	$1,%cl
371	jl	100000f
372	movb	(%rsi),%dl
373	movb	%dl,(%rdi)
374100000:
375	\end
376	ret
377
378	ALIGN_TEXT
3791256:
380	testb	$15,%dil
381	jnz	100f
382.if \erms == 1
383	rep
384	movsb
385.else
386	shrq	$3,%rcx                         /* copy by 64-bit words */
387	rep
388	movsq
389	movq	%rdx,%rcx
390	andl	$7,%ecx                         /* any bytes left? */
391	jne	100408b
392.endif
393	\end
394	ret
395100:
396	movq	(%rsi),%r8
397	movq	8(%rsi),%r9
398	movq	%rdi,%r10
399	movq	%rdi,%rcx
400	andq	$15,%rcx
401	leaq	-16(%rdx,%rcx),%rdx
402	neg	%rcx
403	leaq	16(%rdi,%rcx),%rdi
404	leaq	16(%rsi,%rcx),%rsi
405	movq	%rdx,%rcx
406.if \erms == 1
407	rep
408	movsb
409	movq	%r8,(%r10)
410	movq	%r9,8(%r10)
411.else
412	shrq	$3,%rcx                         /* copy by 64-bit words */
413	rep
414	movsq
415	movq	%r8,(%r10)
416	movq	%r9,8(%r10)
417	movq	%rdx,%rcx
418	andl	$7,%ecx                         /* any bytes left? */
419	jne	100408b
420.endif
421	\end
422	ret
423
424.if \overlap == 1
425	/*
426	 * Copy backwards.
427	 */
428        ALIGN_TEXT
4292:
430	cmpq	$256,%rcx
431	ja	2256f
432
433	leaq	-8(%rdi,%rcx),%rdi
434	leaq	-8(%rsi,%rcx),%rsi
435
436	cmpq	$32,%rcx
437	jb	2016f
438
439	ALIGN_TEXT
4402032:
441	movq	(%rsi),%rdx
442	movq	%rdx,(%rdi)
443	movq	-8(%rsi),%rdx
444	movq	%rdx,-8(%rdi)
445	movq	-16(%rsi),%rdx
446	movq	%rdx,-16(%rdi)
447	movq	-24(%rsi),%rdx
448	movq	%rdx,-24(%rdi)
449	leaq	-32(%rsi),%rsi
450	leaq	-32(%rdi),%rdi
451	subq	$32,%rcx
452	cmpq	$32,%rcx
453	jae	2032b
454	cmpb	$0,%cl
455	jne	2016f
456	\end
457	ret
458	ALIGN_TEXT
4592016:
460	cmpb	$16,%cl
461	jl	2008f
462	movq	(%rsi),%rdx
463	movq	%rdx,(%rdi)
464	movq	-8(%rsi),%rdx
465	movq	%rdx,-8(%rdi)
466	subb	$16,%cl
467	jz	2000f
468	leaq	-16(%rsi),%rsi
469	leaq	-16(%rdi),%rdi
4702008:
471	cmpb	$8,%cl
472	jl	2004f
473	movq	(%rsi),%rdx
474	movq	%rdx,(%rdi)
475	subb	$8,%cl
476	jz	2000f
477	leaq	-8(%rsi),%rsi
478	leaq	-8(%rdi),%rdi
4792004:
480	cmpb	$4,%cl
481	jl	2002f
482	movl	4(%rsi),%edx
483	movl	%edx,4(%rdi)
484	subb	$4,%cl
485	jz	2000f
486	leaq	-4(%rsi),%rsi
487	leaq	-4(%rdi),%rdi
4882002:
489	cmpb	$2,%cl
490	jl	2001f
491	movw	6(%rsi),%dx
492	movw	%dx,6(%rdi)
493	subb	$2,%cl
494	jz	2000f
495	leaq	-2(%rsi),%rsi
496	leaq	-2(%rdi),%rdi
4972001:
498	cmpb	$1,%cl
499	jl	2000f
500	movb	7(%rsi),%dl
501	movb	%dl,7(%rdi)
5022000:
503	\end
504	ret
505	ALIGN_TEXT
5062256:
507	std
508	leaq	-8(%rdi,%rcx),%rdi
509	leaq	-8(%rsi,%rcx),%rsi
510	shrq	$3,%rcx
511	rep
512	movsq
513	cld
514	movq	%rdx,%rcx
515	andb	$7,%cl
516	jne	2004b
517	\end
518	ret
519.endif
520.endm
521
522.macro MEMMOVE_BEGIN
523	PUSH_FRAME_POINTER
524	movq	%rdi,%rax
525	movq	%rdx,%rcx
526.endm
527
528.macro MEMMOVE_END
529	POP_FRAME_POINTER
530.endm
531
532ENTRY(memmove_std)
533	MEMMOVE erms=0 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END
534END(memmove_std)
535
536ENTRY(memmove_erms)
537	MEMMOVE erms=1 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END
538END(memmove_erms)
539
540/*
541 * memcpy(dst, src, len)
542 *        rdi, rsi, rdx
543 *
544 * Note: memcpy does not support overlapping copies
545 */
546ENTRY(memcpy_std)
547	MEMMOVE erms=0 overlap=0 begin=MEMMOVE_BEGIN end=MEMMOVE_END
548END(memcpy_std)
549
550ENTRY(memcpy_erms)
551	MEMMOVE erms=1 overlap=0 begin=MEMMOVE_BEGIN end=MEMMOVE_END
552END(memcpy_erms)
553
554/*
555 * memset(dst, c,   len)
556 *        rdi, rsi, rdx
557 */
558.macro MEMSET erms
559	PUSH_FRAME_POINTER
560	movq	%rdi,%rax
561	movq	%rdx,%rcx
562	movzbq	%sil,%r8
563	movabs	$0x0101010101010101,%r10
564	imulq	%r8,%r10
565
566	cmpq	$32,%rcx
567	jbe	101632f
568
569	cmpq	$256,%rcx
570	ja	1256f
571
572	ALIGN_TEXT
573103200:
574	movq	%r10,(%rdi)
575	movq	%r10,8(%rdi)
576	movq	%r10,16(%rdi)
577	movq	%r10,24(%rdi)
578	leaq	32(%rdi),%rdi
579	subq	$32,%rcx
580	cmpq	$32,%rcx
581	ja	103200b
582	cmpb	$16,%cl
583	ja	201632f
584	movq	%r10,-16(%rdi,%rcx)
585	movq	%r10,-8(%rdi,%rcx)
586	POP_FRAME_POINTER
587	ret
588	ALIGN_TEXT
589101632:
590	cmpb	$16,%cl
591	jl	100816f
592201632:
593	movq	%r10,(%rdi)
594	movq	%r10,8(%rdi)
595	movq	%r10,-16(%rdi,%rcx)
596	movq	%r10,-8(%rdi,%rcx)
597	POP_FRAME_POINTER
598	ret
599	ALIGN_TEXT
600100816:
601	cmpb	$8,%cl
602	jl	100408f
603	movq	%r10,(%rdi)
604	movq	%r10,-8(%rdi,%rcx)
605	POP_FRAME_POINTER
606	ret
607	ALIGN_TEXT
608100408:
609	cmpb	$4,%cl
610	jl	100204f
611	movl	%r10d,(%rdi)
612	movl	%r10d,-4(%rdi,%rcx)
613	POP_FRAME_POINTER
614	ret
615	ALIGN_TEXT
616100204:
617	cmpb	$2,%cl
618	jl	100001f
619	movw	%r10w,(%rdi)
620	movw	%r10w,-2(%rdi,%rcx)
621	POP_FRAME_POINTER
622	ret
623	ALIGN_TEXT
624100001:
625	cmpb	$0,%cl
626	je	100000f
627	movb	%r10b,(%rdi)
628100000:
629	POP_FRAME_POINTER
630	ret
631	ALIGN_TEXT
6321256:
633	movq	%rdi,%r9
634	movq	%r10,%rax
635	testl	$15,%edi
636	jnz	3f
6371:
638.if \erms == 1
639	rep
640	stosb
641	movq	%r9,%rax
642.else
643	movq	%rcx,%rdx
644	shrq	$3,%rcx
645	rep
646	stosq
647	movq	%r9,%rax
648	andl	$7,%edx
649	jnz	2f
650	POP_FRAME_POINTER
651	ret
6522:
653	movq	%r10,-8(%rdi,%rdx)
654.endif
655	POP_FRAME_POINTER
656	ret
657	ALIGN_TEXT
6583:
659	movq	%r10,(%rdi)
660	movq	%r10,8(%rdi)
661	movq	%rdi,%r8
662	andq	$15,%r8
663	leaq	-16(%rcx,%r8),%rcx
664	neg	%r8
665	leaq	16(%rdi,%r8),%rdi
666	jmp	1b
667.endm
668
669ENTRY(memset_std)
670	MEMSET erms=0
671END(memset_std)
672
673ENTRY(memset_erms)
674	MEMSET erms=1
675END(memset_erms)
676
677/* fillw(pat, base, cnt) */
678/*       %rdi,%rsi, %rdx */
679ENTRY(fillw)
680	PUSH_FRAME_POINTER
681	movq	%rdi,%rax
682	movq	%rsi,%rdi
683	movq	%rdx,%rcx
684	rep
685	stosw
686	POP_FRAME_POINTER
687	ret
688END(fillw)
689
690/*
691 * strlen(string)
692 *	  %rdi
693 *
694 * Uses the ((x - 0x01....01) & ~x & 0x80....80) trick.
695 *
696 * 0x01....01 is replaced with 0x0 - 0x01....01 so that it can be added
697 * with leaq.
698 *
699 * For a description see either:
700 * - "Hacker's Delight" by Henry S. Warren, Jr.
701 * - "Optimizing subroutines in assembly language: An optimization guide for x86 platforms"
702 *   by Agner Fog
703 *
704 * The latter contains a 32-bit variant of the same algorithm coded in assembly for i386.
705 */
706ENTRY(strlen)
707	PUSH_FRAME_POINTER
708	movabsq	$0xfefefefefefefeff,%r8
709	movabsq	$0x8080808080808080,%r9
710
711	movq	%rdi,%r10
712	movq	%rdi,%rcx
713	testb	$7,%dil
714	jz	2f
715
716	/*
717	 * Handle misaligned reads: align to 8 and fill
718	 * the spurious bytes.
719	 */
720	andq	$~7,%rdi
721	movq	(%rdi),%r11
722	shlq	$3,%rcx
723	movq	$-1,%rdx
724	shlq	%cl,%rdx
725	notq	%rdx
726	orq	%rdx,%r11
727
728	leaq	(%r11,%r8),%rcx
729	notq	%r11
730	andq	%r11,%rcx
731	andq	%r9,%rcx
732	jnz	3f
733
734	/*
735	 * Main loop.
736	 */
737	ALIGN_TEXT
7381:
739	leaq	8(%rdi),%rdi
7402:
741	movq	(%rdi),%r11
742	leaq	(%r11,%r8),%rcx
743	notq	%r11
744	andq	%r11,%rcx
745	andq	%r9,%rcx
746	jz	1b
7473:
748	bsfq	%rcx,%rcx
749	shrq	$3,%rcx
750	leaq	(%rcx,%rdi),%rax
751	subq	%r10,%rax
752	POP_FRAME_POINTER
753	ret
754END(strlen)
755
756/*****************************************************************************/
757/* copyout and fubyte family                                                 */
758/*****************************************************************************/
759/*
760 * Access user memory from inside the kernel. These routines should be
761 * the only places that do this.
762 *
763 * These routines set curpcb->pcb_onfault for the time they execute. When a
764 * protection violation occurs inside the functions, the trap handler
765 * returns to *curpcb->pcb_onfault instead of the function.
766 */
767
768.macro SMAP_DISABLE smap
769.if	\smap
770	stac
771.endif
772.endm
773
774
775.macro SMAP_ENABLE smap
776.if	\smap
777	clac
778.endif
779.endm
780
781.macro COPYINOUT_BEGIN
782.endm
783
784.macro COPYINOUT_END
785	movq	%rax,PCB_ONFAULT(%r11)
786	POP_FRAME_POINTER
787.endm
788
789.macro COPYINOUT_SMAP_END
790	SMAP_ENABLE smap=1
791	COPYINOUT_END
792.endm
793
794/*
795 * copyout(from_kernel, to_user, len)
796 *         %rdi,        %rsi,    %rdx
797 */
798.macro	COPYOUT smap erms
799	PUSH_FRAME_POINTER
800	movq	PCPU(CURPCB),%r11
801	movq	$copy_fault,PCB_ONFAULT(%r11)
802
803	/*
804	 * Check explicitly for non-user addresses.
805	 * First, prevent address wrapping.
806	 */
807	movq	%rsi,%rax
808	addq	%rdx,%rax
809	jc	copy_fault
810/*
811 * XXX STOP USING VM_MAXUSER_ADDRESS.
812 * It is an end address, not a max, so every time it is used correctly it
813 * looks like there is an off by one error, and of course it caused an off
814 * by one error in several places.
815 */
816	movq	$VM_MAXUSER_ADDRESS,%rcx
817	cmpq	%rcx,%rax
818	ja	copy_fault
819
820	/*
821	 * Set return value to zero. Remaining failure mode goes through
822	 * copy_fault.
823	 */
824	xorl	%eax,%eax
825
826	/*
827	 * Set up arguments for MEMMOVE.
828	 */
829	movq	%rdi,%r8
830	movq	%rsi,%rdi
831	movq	%r8,%rsi
832	movq	%rdx,%rcx
833
834
835	SMAP_DISABLE \smap
836.if	\smap == 1
837	MEMMOVE erms=\erms overlap=0 begin=COPYINOUT_BEGIN end=COPYINOUT_SMAP_END
838.else
839	MEMMOVE erms=\erms overlap=0 begin=COPYINOUT_BEGIN end=COPYINOUT_END
840.endif
841	/* NOTREACHED */
842.endm
843
844ENTRY(copyout_nosmap_std)
845	COPYOUT smap=0 erms=0
846END(copyout_nosmap_std)
847
848ENTRY(copyout_smap_std)
849	COPYOUT smap=1 erms=0
850END(copyout_smap_std)
851
852ENTRY(copyout_nosmap_erms)
853	COPYOUT smap=0 erms=1
854END(copyout_nosmap_erms)
855
856ENTRY(copyout_smap_erms)
857	COPYOUT smap=1 erms=1
858END(copyout_smap_erms)
859
860/*
861 * copyin(from_user, to_kernel, len)
862 *        %rdi,      %rsi,      %rdx
863 */
864.macro	COPYIN smap erms
865	PUSH_FRAME_POINTER
866	movq	PCPU(CURPCB),%r11
867	movq	$copy_fault,PCB_ONFAULT(%r11)
868
869	/*
870	 * make sure address is valid
871	 */
872	movq	%rdi,%rax
873	addq	%rdx,%rax
874	jc	copy_fault
875	movq	$VM_MAXUSER_ADDRESS,%rcx
876	cmpq	%rcx,%rax
877	ja	copy_fault
878
879	xorl	%eax,%eax
880
881	movq	%rdi,%r8
882	movq	%rsi,%rdi
883	movq	%r8,%rsi
884	movq	%rdx,%rcx
885
886	SMAP_DISABLE \smap
887.if	\smap == 1
888	MEMMOVE erms=\erms overlap=0 begin=COPYINOUT_BEGIN end=COPYINOUT_SMAP_END
889.else
890	MEMMOVE erms=\erms overlap=0 begin=COPYINOUT_BEGIN end=COPYINOUT_END
891.endif
892	/* NOTREACHED */
893.endm
894
895ENTRY(copyin_nosmap_std)
896	COPYIN smap=0 erms=0
897END(copyin_nosmap_std)
898
899ENTRY(copyin_smap_std)
900	COPYIN smap=1 erms=0
901END(copyin_smap_std)
902
903ENTRY(copyin_nosmap_erms)
904	COPYIN smap=0 erms=1
905END(copyin_nosmap_erms)
906
907ENTRY(copyin_smap_erms)
908	COPYIN smap=1 erms=1
909END(copyin_smap_erms)
910
911	ALIGN_TEXT
912copy_fault:
913	testl	$CPUID_STDEXT_SMAP,cpu_stdext_feature(%rip)
914	je	1f
915	clac
9161:	movq	$0,PCB_ONFAULT(%r11)
917	movl	$EFAULT,%eax
918	POP_FRAME_POINTER
919	ret
920
921/*
922 * casueword32.  Compare and set user integer.  Returns -1 on fault,
923 *        0 if access was successful, and 1 when comparison failed.
924 *        Old value is written to *oldp.
925 *        dst = %rdi, old = %esi, oldp = %rdx, new = %ecx
926 */
927ENTRY(casueword32_nosmap)
928	PUSH_FRAME_POINTER
929	movq	PCPU(CURPCB),%r8
930	movq	$fusufault,PCB_ONFAULT(%r8)
931
932	movq	$VM_MAXUSER_ADDRESS-4,%rax
933	cmpq	%rax,%rdi			/* verify address is valid */
934	ja	fusufault
935
936	movl	%esi,%eax			/* old */
937	lock cmpxchgl %ecx,(%rdi)		/* new = %ecx */
938	setne	%cl
939
940	/*
941	 * The old value is in %eax.  If the store succeeded it will be the
942	 * value we expected (old) from before the store, otherwise it will
943	 * be the current value.  Save %eax into %esi to prepare the return
944	 * value.
945	 */
946	movl	%eax,%esi
947	xorl	%eax,%eax
948	movq	%rax,PCB_ONFAULT(%r8)
949
950	/*
951	 * Access the oldp after the pcb_onfault is cleared, to correctly
952	 * catch corrupted pointer.
953	 */
954	movl	%esi,(%rdx)			/* oldp = %rdx */
955	POP_FRAME_POINTER
956	movzbl	%cl, %eax
957	ret
958END(casueword32_nosmap)
959
960ENTRY(casueword32_smap)
961	PUSH_FRAME_POINTER
962	movq	PCPU(CURPCB),%r8
963	movq	$fusufault,PCB_ONFAULT(%r8)
964
965	movq	$VM_MAXUSER_ADDRESS-4,%rax
966	cmpq	%rax,%rdi			/* verify address is valid */
967	ja	fusufault
968
969	movl	%esi,%eax			/* old */
970	stac
971	lock cmpxchgl %ecx,(%rdi)		/* new = %ecx */
972	clac
973	setne	%cl
974
975	/*
976	 * The old value is in %eax.  If the store succeeded it will be the
977	 * value we expected (old) from before the store, otherwise it will
978	 * be the current value.  Save %eax into %esi to prepare the return
979	 * value.
980	 */
981	movl	%eax,%esi
982	xorl	%eax,%eax
983	movq	%rax,PCB_ONFAULT(%r8)
984
985	/*
986	 * Access the oldp after the pcb_onfault is cleared, to correctly
987	 * catch corrupted pointer.
988	 */
989	movl	%esi,(%rdx)			/* oldp = %rdx */
990	POP_FRAME_POINTER
991	movzbl	%cl, %eax
992	ret
993END(casueword32_smap)
994
995/*
996 * casueword.  Compare and set user long.  Returns -1 on fault,
997 *        0 if access was successful, and 1 when comparison failed.
998 *        Old value is written to *oldp.
999 *        dst = %rdi, old = %rsi, oldp = %rdx, new = %rcx
1000 */
1001ENTRY(casueword_nosmap)
1002	PUSH_FRAME_POINTER
1003	movq	PCPU(CURPCB),%r8
1004	movq	$fusufault,PCB_ONFAULT(%r8)
1005
1006	movq	$VM_MAXUSER_ADDRESS-4,%rax
1007	cmpq	%rax,%rdi			/* verify address is valid */
1008	ja	fusufault
1009
1010	movq	%rsi,%rax			/* old */
1011	lock cmpxchgq %rcx,(%rdi)		/* new = %rcx */
1012	setne	%cl
1013
1014	/*
1015	 * The old value is in %rax.  If the store succeeded it will be the
1016	 * value we expected (old) from before the store, otherwise it will
1017	 * be the current value.
1018	 */
1019	movq	%rax,%rsi
1020	xorl	%eax,%eax
1021	movq	%rax,PCB_ONFAULT(%r8)
1022	movq	%rsi,(%rdx)
1023	POP_FRAME_POINTER
1024	movzbl	%cl, %eax
1025	ret
1026END(casueword_nosmap)
1027
1028ENTRY(casueword_smap)
1029	PUSH_FRAME_POINTER
1030	movq	PCPU(CURPCB),%r8
1031	movq	$fusufault,PCB_ONFAULT(%r8)
1032
1033	movq	$VM_MAXUSER_ADDRESS-4,%rax
1034	cmpq	%rax,%rdi			/* verify address is valid */
1035	ja	fusufault
1036
1037	movq	%rsi,%rax			/* old */
1038	stac
1039	lock cmpxchgq %rcx,(%rdi)		/* new = %rcx */
1040	clac
1041	setne	%cl
1042
1043	/*
1044	 * The old value is in %rax.  If the store succeeded it will be the
1045	 * value we expected (old) from before the store, otherwise it will
1046	 * be the current value.
1047	 */
1048	movq	%rax,%rsi
1049	xorl	%eax,%eax
1050	movq	%rax,PCB_ONFAULT(%r8)
1051	movq	%rsi,(%rdx)
1052	POP_FRAME_POINTER
1053	movzbl	%cl, %eax
1054	ret
1055END(casueword_smap)
1056
1057/*
1058 * Fetch (load) a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit
1059 * byte from user memory.
1060 * addr = %rdi, valp = %rsi
1061 */
1062
1063ENTRY(fueword_nosmap)
1064	PUSH_FRAME_POINTER
1065	movq	PCPU(CURPCB),%rcx
1066	movq	$fusufault,PCB_ONFAULT(%rcx)
1067
1068	movq	$VM_MAXUSER_ADDRESS-8,%rax
1069	cmpq	%rax,%rdi			/* verify address is valid */
1070	ja	fusufault
1071
1072	xorl	%eax,%eax
1073	movq	(%rdi),%r11
1074	movq	%rax,PCB_ONFAULT(%rcx)
1075	movq	%r11,(%rsi)
1076	POP_FRAME_POINTER
1077	ret
1078END(fueword_nosmap)
1079
1080ENTRY(fueword_smap)
1081	PUSH_FRAME_POINTER
1082	movq	PCPU(CURPCB),%rcx
1083	movq	$fusufault,PCB_ONFAULT(%rcx)
1084
1085	movq	$VM_MAXUSER_ADDRESS-8,%rax
1086	cmpq	%rax,%rdi			/* verify address is valid */
1087	ja	fusufault
1088
1089	xorl	%eax,%eax
1090	stac
1091	movq	(%rdi),%r11
1092	clac
1093	movq	%rax,PCB_ONFAULT(%rcx)
1094	movq	%r11,(%rsi)
1095	POP_FRAME_POINTER
1096	ret
1097END(fueword_smap)
1098
1099ENTRY(fueword32_nosmap)
1100	PUSH_FRAME_POINTER
1101	movq	PCPU(CURPCB),%rcx
1102	movq	$fusufault,PCB_ONFAULT(%rcx)
1103
1104	movq	$VM_MAXUSER_ADDRESS-4,%rax
1105	cmpq	%rax,%rdi			/* verify address is valid */
1106	ja	fusufault
1107
1108	xorl	%eax,%eax
1109	movl	(%rdi),%r11d
1110	movq	%rax,PCB_ONFAULT(%rcx)
1111	movl	%r11d,(%rsi)
1112	POP_FRAME_POINTER
1113	ret
1114END(fueword32_nosmap)
1115
1116ENTRY(fueword32_smap)
1117	PUSH_FRAME_POINTER
1118	movq	PCPU(CURPCB),%rcx
1119	movq	$fusufault,PCB_ONFAULT(%rcx)
1120
1121	movq	$VM_MAXUSER_ADDRESS-4,%rax
1122	cmpq	%rax,%rdi			/* verify address is valid */
1123	ja	fusufault
1124
1125	xorl	%eax,%eax
1126	stac
1127	movl	(%rdi),%r11d
1128	clac
1129	movq	%rax,PCB_ONFAULT(%rcx)
1130	movl	%r11d,(%rsi)
1131	POP_FRAME_POINTER
1132	ret
1133END(fueword32_smap)
1134
1135ENTRY(fuword16_nosmap)
1136	PUSH_FRAME_POINTER
1137	movq	PCPU(CURPCB),%rcx
1138	movq	$fusufault,PCB_ONFAULT(%rcx)
1139
1140	movq	$VM_MAXUSER_ADDRESS-2,%rax
1141	cmpq	%rax,%rdi
1142	ja	fusufault
1143
1144	movzwl	(%rdi),%eax
1145	movq	$0,PCB_ONFAULT(%rcx)
1146	POP_FRAME_POINTER
1147	ret
1148END(fuword16_nosmap)
1149
1150ENTRY(fuword16_smap)
1151	PUSH_FRAME_POINTER
1152	movq	PCPU(CURPCB),%rcx
1153	movq	$fusufault,PCB_ONFAULT(%rcx)
1154
1155	movq	$VM_MAXUSER_ADDRESS-2,%rax
1156	cmpq	%rax,%rdi
1157	ja	fusufault
1158
1159	stac
1160	movzwl	(%rdi),%eax
1161	clac
1162	movq	$0,PCB_ONFAULT(%rcx)
1163	POP_FRAME_POINTER
1164	ret
1165END(fuword16_smap)
1166
1167ENTRY(fubyte_nosmap)
1168	PUSH_FRAME_POINTER
1169	movq	PCPU(CURPCB),%rcx
1170	movq	$fusufault,PCB_ONFAULT(%rcx)
1171
1172	movq	$VM_MAXUSER_ADDRESS-1,%rax
1173	cmpq	%rax,%rdi
1174	ja	fusufault
1175
1176	movzbl	(%rdi),%eax
1177	movq	$0,PCB_ONFAULT(%rcx)
1178	POP_FRAME_POINTER
1179	ret
1180END(fubyte_nosmap)
1181
1182ENTRY(fubyte_smap)
1183	PUSH_FRAME_POINTER
1184	movq	PCPU(CURPCB),%rcx
1185	movq	$fusufault,PCB_ONFAULT(%rcx)
1186
1187	movq	$VM_MAXUSER_ADDRESS-1,%rax
1188	cmpq	%rax,%rdi
1189	ja	fusufault
1190
1191	stac
1192	movzbl	(%rdi),%eax
1193	clac
1194	movq	$0,PCB_ONFAULT(%rcx)
1195	POP_FRAME_POINTER
1196	ret
1197END(fubyte_smap)
1198
1199/*
1200 * Store a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit byte to
1201 * user memory.
1202 * addr = %rdi, value = %rsi
1203 */
1204ENTRY(suword_nosmap)
1205	PUSH_FRAME_POINTER
1206	movq	PCPU(CURPCB),%rcx
1207	movq	$fusufault,PCB_ONFAULT(%rcx)
1208
1209	movq	$VM_MAXUSER_ADDRESS-8,%rax
1210	cmpq	%rax,%rdi			/* verify address validity */
1211	ja	fusufault
1212
1213	movq	%rsi,(%rdi)
1214	xorl	%eax,%eax
1215	movq	%rax,PCB_ONFAULT(%rcx)
1216	POP_FRAME_POINTER
1217	ret
1218END(suword_nosmap)
1219
1220ENTRY(suword_smap)
1221	PUSH_FRAME_POINTER
1222	movq	PCPU(CURPCB),%rcx
1223	movq	$fusufault,PCB_ONFAULT(%rcx)
1224
1225	movq	$VM_MAXUSER_ADDRESS-8,%rax
1226	cmpq	%rax,%rdi			/* verify address validity */
1227	ja	fusufault
1228
1229	stac
1230	movq	%rsi,(%rdi)
1231	clac
1232	xorl	%eax,%eax
1233	movq	%rax,PCB_ONFAULT(%rcx)
1234	POP_FRAME_POINTER
1235	ret
1236END(suword_smap)
1237
1238ENTRY(suword32_nosmap)
1239	PUSH_FRAME_POINTER
1240	movq	PCPU(CURPCB),%rcx
1241	movq	$fusufault,PCB_ONFAULT(%rcx)
1242
1243	movq	$VM_MAXUSER_ADDRESS-4,%rax
1244	cmpq	%rax,%rdi			/* verify address validity */
1245	ja	fusufault
1246
1247	movl	%esi,(%rdi)
1248	xorl	%eax,%eax
1249	movq	%rax,PCB_ONFAULT(%rcx)
1250	POP_FRAME_POINTER
1251	ret
1252END(suword32_nosmap)
1253
1254ENTRY(suword32_smap)
1255	PUSH_FRAME_POINTER
1256	movq	PCPU(CURPCB),%rcx
1257	movq	$fusufault,PCB_ONFAULT(%rcx)
1258
1259	movq	$VM_MAXUSER_ADDRESS-4,%rax
1260	cmpq	%rax,%rdi			/* verify address validity */
1261	ja	fusufault
1262
1263	stac
1264	movl	%esi,(%rdi)
1265	clac
1266	xorl	%eax,%eax
1267	movq	%rax,PCB_ONFAULT(%rcx)
1268	POP_FRAME_POINTER
1269	ret
1270END(suword32_smap)
1271
1272ENTRY(suword16_nosmap)
1273	PUSH_FRAME_POINTER
1274	movq	PCPU(CURPCB),%rcx
1275	movq	$fusufault,PCB_ONFAULT(%rcx)
1276
1277	movq	$VM_MAXUSER_ADDRESS-2,%rax
1278	cmpq	%rax,%rdi			/* verify address validity */
1279	ja	fusufault
1280
1281	movw	%si,(%rdi)
1282	xorl	%eax,%eax
1283	movq	%rax,PCB_ONFAULT(%rcx)
1284	POP_FRAME_POINTER
1285	ret
1286END(suword16_nosmap)
1287
1288ENTRY(suword16_smap)
1289	PUSH_FRAME_POINTER
1290	movq	PCPU(CURPCB),%rcx
1291	movq	$fusufault,PCB_ONFAULT(%rcx)
1292
1293	movq	$VM_MAXUSER_ADDRESS-2,%rax
1294	cmpq	%rax,%rdi			/* verify address validity */
1295	ja	fusufault
1296
1297	stac
1298	movw	%si,(%rdi)
1299	clac
1300	xorl	%eax,%eax
1301	movq	%rax,PCB_ONFAULT(%rcx)
1302	POP_FRAME_POINTER
1303	ret
1304END(suword16_smap)
1305
1306ENTRY(subyte_nosmap)
1307	PUSH_FRAME_POINTER
1308	movq	PCPU(CURPCB),%rcx
1309	movq	$fusufault,PCB_ONFAULT(%rcx)
1310
1311	movq	$VM_MAXUSER_ADDRESS-1,%rax
1312	cmpq	%rax,%rdi			/* verify address validity */
1313	ja	fusufault
1314
1315	movl	%esi,%eax
1316	movb	%al,(%rdi)
1317	xorl	%eax,%eax
1318	movq	%rax,PCB_ONFAULT(%rcx)
1319	POP_FRAME_POINTER
1320	ret
1321END(subyte_nosmap)
1322
1323ENTRY(subyte_smap)
1324	PUSH_FRAME_POINTER
1325	movq	PCPU(CURPCB),%rcx
1326	movq	$fusufault,PCB_ONFAULT(%rcx)
1327
1328	movq	$VM_MAXUSER_ADDRESS-1,%rax
1329	cmpq	%rax,%rdi			/* verify address validity */
1330	ja	fusufault
1331
1332	movl	%esi,%eax
1333	stac
1334	movb	%al,(%rdi)
1335	clac
1336	xorl	%eax,%eax
1337	movq	%rax,PCB_ONFAULT(%rcx)
1338	POP_FRAME_POINTER
1339	ret
1340END(subyte_smap)
1341
1342	ALIGN_TEXT
1343fusufault:
1344	testl	$CPUID_STDEXT_SMAP,cpu_stdext_feature(%rip)
1345	je	1f
1346	clac
13471:	movq	PCPU(CURPCB),%rcx
1348	xorl	%eax,%eax
1349	movq	%rax,PCB_ONFAULT(%rcx)
1350	decq	%rax
1351	POP_FRAME_POINTER
1352	ret
1353
1354/*
1355 * copyinstr(from, to, maxlen, int *lencopied)
1356 *           %rdi, %rsi, %rdx, %rcx
1357 *
1358 *	copy a string from 'from' to 'to', stop when a 0 character is reached.
1359 *	return ENAMETOOLONG if string is longer than maxlen, and
1360 *	EFAULT on protection violations. If lencopied is non-zero,
1361 *	return the actual length in *lencopied.
1362 */
1363.macro COPYINSTR smap
1364	PUSH_FRAME_POINTER
1365	movq	%rdx,%r8			/* %r8 = maxlen */
1366	movq	PCPU(CURPCB),%r9
1367	movq	$cpystrflt,PCB_ONFAULT(%r9)
1368
1369	movq	$VM_MAXUSER_ADDRESS,%rax
1370
1371	/* make sure 'from' is within bounds */
1372	subq	%rdi,%rax
1373	jbe	cpystrflt
1374
1375	SMAP_DISABLE \smap
1376
1377	/* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
1378	cmpq	%rdx,%rax
1379	jb	8f
13801:
1381	incq	%rdx
13822:
1383	decq	%rdx
1384.if \smap == 0
1385	jz	copyinstr_toolong
1386.else
1387	jz	copyinstr_toolong_smap
1388.endif
1389
1390	movb	(%rdi),%al
1391	movb	%al,(%rsi)
1392	incq	%rsi
1393	incq	%rdi
1394	testb	%al,%al
1395	jnz	2b
1396
1397	SMAP_ENABLE \smap
1398
1399	/* Success -- 0 byte reached */
1400	decq	%rdx
1401	xorl	%eax,%eax
1402
1403	/* set *lencopied and return %eax */
1404	movq	%rax,PCB_ONFAULT(%r9)
1405
1406	testq	%rcx,%rcx
1407	jz	3f
1408	subq	%rdx,%r8
1409	movq	%r8,(%rcx)
14103:
1411	POP_FRAME_POINTER
1412	ret
1413	ALIGN_TEXT
14148:
1415	movq	%rax,%rdx
1416	movq	%rax,%r8
1417	jmp 1b
1418
1419.endm
1420
1421ENTRY(copyinstr_nosmap)
1422	COPYINSTR smap=0
1423END(copyinstr_nosmap)
1424
1425ENTRY(copyinstr_smap)
1426	COPYINSTR smap=1
1427END(copyinstr_smap)
1428
1429cpystrflt:
1430	testl	$CPUID_STDEXT_SMAP,cpu_stdext_feature(%rip)
1431	je	1f
1432	clac
14331:	movl	$EFAULT,%eax
1434cpystrflt_x:
1435	/* set *lencopied and return %eax */
1436	movq	$0,PCB_ONFAULT(%r9)
1437
1438	testq	%rcx,%rcx
1439	jz	1f
1440	subq	%rdx,%r8
1441	movq	%r8,(%rcx)
14421:
1443	POP_FRAME_POINTER
1444	ret
1445
1446copyinstr_toolong_smap:
1447	clac
1448copyinstr_toolong:
1449	/* rdx is zero - return ENAMETOOLONG or EFAULT */
1450	movq	$VM_MAXUSER_ADDRESS,%rax
1451	cmpq	%rax,%rdi
1452	jae	cpystrflt
1453	movl	$ENAMETOOLONG,%eax
1454	jmp	cpystrflt_x
1455
1456/*
1457 * Handling of special amd64 registers and descriptor tables etc
1458 */
1459/* void lgdt(struct region_descriptor *rdp); */
1460ENTRY(lgdt)
1461	/* reload the descriptor table */
1462	lgdt	(%rdi)
1463
1464	/* flush the prefetch q */
1465	jmp	1f
1466	nop
14671:
1468	movl	$KDSEL,%eax
1469	movl	%eax,%ds
1470	movl	%eax,%es
1471	movl	%eax,%fs	/* Beware, use wrmsr to set 64 bit base */
1472	movl	%eax,%gs
1473	movl	%eax,%ss
1474
1475	/* reload code selector by turning return into intersegmental return */
1476	popq	%rax
1477	pushq	$KCSEL
1478	pushq	%rax
1479	lretq
1480END(lgdt)
1481
1482/*****************************************************************************/
1483/* setjump, longjump                                                         */
1484/*****************************************************************************/
1485
1486ENTRY(setjmp)
1487	movq	%rbx,0(%rdi)			/* save rbx */
1488	movq	%rsp,8(%rdi)			/* save rsp */
1489	movq	%rbp,16(%rdi)			/* save rbp */
1490	movq	%r12,24(%rdi)			/* save r12 */
1491	movq	%r13,32(%rdi)			/* save r13 */
1492	movq	%r14,40(%rdi)			/* save r14 */
1493	movq	%r15,48(%rdi)			/* save r15 */
1494	movq	0(%rsp),%rdx			/* get rta */
1495	movq	%rdx,56(%rdi)			/* save rip */
1496	xorl	%eax,%eax			/* return(0); */
1497	ret
1498END(setjmp)
1499
1500ENTRY(longjmp)
1501	movq	0(%rdi),%rbx			/* restore rbx */
1502	movq	8(%rdi),%rsp			/* restore rsp */
1503	movq	16(%rdi),%rbp			/* restore rbp */
1504	movq	24(%rdi),%r12			/* restore r12 */
1505	movq	32(%rdi),%r13			/* restore r13 */
1506	movq	40(%rdi),%r14			/* restore r14 */
1507	movq	48(%rdi),%r15			/* restore r15 */
1508	movq	56(%rdi),%rdx			/* get rta */
1509	movq	%rdx,0(%rsp)			/* put in return frame */
1510	xorl	%eax,%eax			/* return(1); */
1511	incl	%eax
1512	ret
1513END(longjmp)
1514
1515/*
1516 * Support for reading MSRs in the safe manner.  (Instead of panic on #gp,
1517 * return an error.)
1518 */
1519ENTRY(rdmsr_safe)
1520/* int rdmsr_safe(u_int msr, uint64_t *data) */
1521	PUSH_FRAME_POINTER
1522	movq	PCPU(CURPCB),%r8
1523	movq	PCB_ONFAULT(%r8),%r9
1524	movq	$msr_onfault,PCB_ONFAULT(%r8)
1525	movl	%edi,%ecx
1526	rdmsr			/* Read MSR pointed by %ecx. Returns
1527				   hi byte in edx, lo in %eax */
1528	salq	$32,%rdx	/* sign-shift %rdx left */
1529	movl	%eax,%eax	/* zero-extend %eax -> %rax */
1530	orq	%rdx,%rax
1531	movq	%rax,(%rsi)
1532	movq	%r9,PCB_ONFAULT(%r8)
1533	xorl	%eax,%eax
1534	POP_FRAME_POINTER
1535	ret
1536
1537/*
1538 * Support for writing MSRs in the safe manner.  (Instead of panic on #gp,
1539 * return an error.)
1540 */
1541ENTRY(wrmsr_safe)
1542/* int wrmsr_safe(u_int msr, uint64_t data) */
1543	PUSH_FRAME_POINTER
1544	movq	PCPU(CURPCB),%r8
1545	movq	PCB_ONFAULT(%r8),%r9
1546	movq	$msr_onfault,PCB_ONFAULT(%r8)
1547	movl	%edi,%ecx
1548	movl	%esi,%eax
1549	sarq	$32,%rsi
1550	movl	%esi,%edx
1551	wrmsr			/* Write MSR pointed by %ecx. Accepts
1552				   hi byte in edx, lo in %eax. */
1553	movq	%r9,PCB_ONFAULT(%r8)
1554	xorl	%eax,%eax
1555	POP_FRAME_POINTER
1556	ret
1557
1558/*
1559 * MSR operations fault handler
1560 */
1561	ALIGN_TEXT
1562msr_onfault:
1563	movq	%r9,PCB_ONFAULT(%r8)
1564	movl	$EFAULT,%eax
1565	POP_FRAME_POINTER
1566	ret
1567
1568ENTRY(wrmsr_early_safe)
1569	movl	%edi,%ecx
1570	movl	%esi,%eax
1571	sarq	$32,%rsi
1572	movl	%esi,%edx
1573	wrmsr
1574	xorl	%eax,%eax
1575wrmsr_early_faulted:
1576	ret
1577
1578ENTRY(wrmsr_early_safe_gp_handler)
1579	addq	$8,%rsp
1580	movl	$EFAULT,%eax
1581	movq	$wrmsr_early_faulted,(%rsp)
1582	iretq
1583
1584/*
1585 * void pmap_pti_pcid_invalidate(uint64_t ucr3, uint64_t kcr3);
1586 * Invalidates address space addressed by ucr3, then returns to kcr3.
1587 * Done in assembler to ensure no other memory accesses happen while
1588 * on ucr3.
1589 */
1590	ALIGN_TEXT
1591ENTRY(pmap_pti_pcid_invalidate)
1592	pushfq
1593	cli
1594	movq	%rdi,%cr3	/* to user page table */
1595	movq	%rsi,%cr3	/* back to kernel */
1596	popfq
1597	retq
1598
1599/*
1600 * void pmap_pti_pcid_invlpg(uint64_t ucr3, uint64_t kcr3, vm_offset_t va);
1601 * Invalidates virtual address va in address space ucr3, then returns to kcr3.
1602 */
1603	ALIGN_TEXT
1604ENTRY(pmap_pti_pcid_invlpg)
1605	pushfq
1606	cli
1607	movq	%rdi,%cr3	/* to user page table */
1608	invlpg	(%rdx)
1609	movq	%rsi,%cr3	/* back to kernel */
1610	popfq
1611	retq
1612
1613/*
1614 * void pmap_pti_pcid_invlrng(uint64_t ucr3, uint64_t kcr3, vm_offset_t sva,
1615 *     vm_offset_t eva);
1616 * Invalidates virtual addresses between sva and eva in address space ucr3,
1617 * then returns to kcr3.
1618 */
1619	ALIGN_TEXT
1620ENTRY(pmap_pti_pcid_invlrng)
1621	pushfq
1622	cli
1623	movq	%rdi,%cr3	/* to user page table */
16241:	invlpg	(%rdx)
1625	addq	$PAGE_SIZE,%rdx
1626	cmpq	%rdx,%rcx
1627	ja	1b
1628	movq	%rsi,%cr3	/* back to kernel */
1629	popfq
1630	retq
1631
1632	.altmacro
1633	.macro	rsb_seq_label l
1634rsb_seq_\l:
1635	.endm
1636	.macro	rsb_call_label l
1637	call	rsb_seq_\l
1638	.endm
1639	.macro	rsb_seq count
1640	ll=1
1641	.rept	\count
1642	rsb_call_label	%(ll)
1643	nop
1644	rsb_seq_label %(ll)
1645	addq	$8,%rsp
1646	ll=ll+1
1647	.endr
1648	.endm
1649
1650ENTRY(rsb_flush)
1651	rsb_seq	32
1652	ret
1653
1654/* all callers already saved %rax, %rdx, and %rcx */
1655ENTRY(handle_ibrs_entry)
1656	cmpb	$0,hw_ibrs_ibpb_active(%rip)
1657	je	1f
1658	movl	$MSR_IA32_SPEC_CTRL,%ecx
1659	rdmsr
1660	orl	$(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP),%eax
1661	orl	$(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP)>>32,%edx
1662	wrmsr
1663	movb	$1,PCPU(IBPB_SET)
1664	testl	$CPUID_STDEXT_SMEP,cpu_stdext_feature(%rip)
1665	je	rsb_flush
16661:	ret
1667END(handle_ibrs_entry)
1668
1669ENTRY(handle_ibrs_exit)
1670	cmpb	$0,PCPU(IBPB_SET)
1671	je	1f
1672	movl	$MSR_IA32_SPEC_CTRL,%ecx
1673	rdmsr
1674	andl	$~(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP),%eax
1675	andl	$~((IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP)>>32),%edx
1676	wrmsr
1677	movb	$0,PCPU(IBPB_SET)
16781:	ret
1679END(handle_ibrs_exit)
1680
1681/* registers-neutral version, but needs stack */
1682ENTRY(handle_ibrs_exit_rs)
1683	cmpb	$0,PCPU(IBPB_SET)
1684	je	1f
1685	pushq	%rax
1686	pushq	%rdx
1687	pushq	%rcx
1688	movl	$MSR_IA32_SPEC_CTRL,%ecx
1689	rdmsr
1690	andl	$~(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP),%eax
1691	andl	$~((IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP)>>32),%edx
1692	wrmsr
1693	popq	%rcx
1694	popq	%rdx
1695	popq	%rax
1696	movb	$0,PCPU(IBPB_SET)
16971:	ret
1698END(handle_ibrs_exit_rs)
1699
1700	.noaltmacro
1701
1702/*
1703 * Flush L1D cache.  Load enough of the data from the kernel text
1704 * to flush existing L1D content.
1705 *
1706 * N.B. The function does not follow ABI calling conventions, it corrupts %rbx.
1707 * The vmm.ko caller expects that only %rax, %rdx, %rbx, %rcx, %r9, and %rflags
1708 * registers are clobbered.  The NMI handler caller only needs %r13 and %r15
1709 * preserved.
1710 */
1711ENTRY(flush_l1d_sw)
1712#define	L1D_FLUSH_SIZE	(64 * 1024)
1713	movq	$KERNBASE, %r9
1714	movq	$-L1D_FLUSH_SIZE, %rcx
1715	/*
1716	 * pass 1: Preload TLB.
1717	 * Kernel text is mapped using superpages.  TLB preload is
1718	 * done for the benefit of older CPUs which split 2M page
1719	 * into 4k TLB entries.
1720	 */
17211:	movb	L1D_FLUSH_SIZE(%r9, %rcx), %al
1722	addq	$PAGE_SIZE, %rcx
1723	jne	1b
1724	xorl	%eax, %eax
1725	cpuid
1726	movq	$-L1D_FLUSH_SIZE, %rcx
1727	/* pass 2: Read each cache line. */
17282:	movb	L1D_FLUSH_SIZE(%r9, %rcx), %al
1729	addq	$64, %rcx
1730	jne	2b
1731	lfence
1732	ret
1733#undef	L1D_FLUSH_SIZE
1734END(flush_l1d_sw)
1735
1736ENTRY(flush_l1d_sw_abi)
1737	pushq	%rbx
1738	call	flush_l1d_sw
1739	popq	%rbx
1740	ret
1741END(flush_l1d_sw_abi)
1742
1743ENTRY(mds_handler_void)
1744	retq
1745END(mds_handler_void)
1746
1747ENTRY(mds_handler_verw)
1748	subq	$8, %rsp
1749	movw	%ds, (%rsp)
1750	verw	(%rsp)
1751	addq	$8, %rsp
1752	retq
1753END(mds_handler_verw)
1754
1755ENTRY(mds_handler_ivb)
1756	pushq	%rax
1757	pushq	%rdx
1758	pushq	%rcx
1759
1760	movq	%cr0, %rax
1761	testb	$CR0_TS, %al
1762	je	1f
1763	clts
17641:	movq	PCPU(MDS_BUF), %rdx
1765	movdqa	%xmm0, PCPU(MDS_TMP)
1766	pxor	%xmm0, %xmm0
1767
1768	lfence
1769	orpd	(%rdx), %xmm0
1770	orpd	(%rdx), %xmm0
1771	mfence
1772	movl	$40, %ecx
1773	addq	$16, %rdx
17742:	movntdq	%xmm0, (%rdx)
1775	addq	$16, %rdx
1776	decl	%ecx
1777	jnz	2b
1778	mfence
1779
1780	movdqa	PCPU(MDS_TMP),%xmm0
1781	testb	$CR0_TS, %al
1782	je	3f
1783	movq	%rax, %cr0
17843:	popq	%rcx
1785	popq	%rdx
1786	popq	%rax
1787	retq
1788END(mds_handler_ivb)
1789
1790ENTRY(mds_handler_bdw)
1791	pushq	%rax
1792	pushq	%rbx
1793	pushq	%rcx
1794	pushq	%rdi
1795	pushq	%rsi
1796
1797	movq	%cr0, %rax
1798	testb	$CR0_TS, %al
1799	je	1f
1800	clts
18011:	movq	PCPU(MDS_BUF), %rbx
1802	movdqa	%xmm0, PCPU(MDS_TMP)
1803	pxor	%xmm0, %xmm0
1804
1805	movq	%rbx, %rdi
1806	movq	%rbx, %rsi
1807	movl	$40, %ecx
18082:	movntdq	%xmm0, (%rbx)
1809	addq	$16, %rbx
1810	decl	%ecx
1811	jnz	2b
1812	mfence
1813	movl	$1536, %ecx
1814	rep; movsb
1815	lfence
1816
1817	movdqa	PCPU(MDS_TMP),%xmm0
1818	testb	$CR0_TS, %al
1819	je	3f
1820	movq	%rax, %cr0
18213:	popq	%rsi
1822	popq	%rdi
1823	popq	%rcx
1824	popq	%rbx
1825	popq	%rax
1826	retq
1827END(mds_handler_bdw)
1828
1829ENTRY(mds_handler_skl_sse)
1830	pushq	%rax
1831	pushq	%rdx
1832	pushq	%rcx
1833	pushq	%rdi
1834
1835	movq	%cr0, %rax
1836	testb	$CR0_TS, %al
1837	je	1f
1838	clts
18391:	movq	PCPU(MDS_BUF), %rdi
1840	movq	PCPU(MDS_BUF64), %rdx
1841	movdqa	%xmm0, PCPU(MDS_TMP)
1842	pxor	%xmm0, %xmm0
1843
1844	lfence
1845	orpd	(%rdx), %xmm0
1846	orpd	(%rdx), %xmm0
1847	xorl	%eax, %eax
18482:	clflushopt	5376(%rdi, %rax, 8)
1849	addl	$8, %eax
1850	cmpl	$8 * 12, %eax
1851	jb	2b
1852	sfence
1853	movl	$6144, %ecx
1854	xorl	%eax, %eax
1855	rep; stosb
1856	mfence
1857
1858	movdqa	PCPU(MDS_TMP), %xmm0
1859	testb	$CR0_TS, %al
1860	je	3f
1861	movq	%rax, %cr0
18623:	popq	%rdi
1863	popq	%rcx
1864	popq	%rdx
1865	popq	%rax
1866	retq
1867END(mds_handler_skl_sse)
1868
1869ENTRY(mds_handler_skl_avx)
1870	pushq	%rax
1871	pushq	%rdx
1872	pushq	%rcx
1873	pushq	%rdi
1874
1875	movq	%cr0, %rax
1876	testb	$CR0_TS, %al
1877	je	1f
1878	clts
18791:	movq	PCPU(MDS_BUF), %rdi
1880	movq	PCPU(MDS_BUF64), %rdx
1881	vmovdqa	%ymm0, PCPU(MDS_TMP)
1882	vpxor	%ymm0, %ymm0, %ymm0
1883
1884	lfence
1885	vorpd	(%rdx), %ymm0, %ymm0
1886	vorpd	(%rdx), %ymm0, %ymm0
1887	xorl	%eax, %eax
18882:	clflushopt	5376(%rdi, %rax, 8)
1889	addl	$8, %eax
1890	cmpl	$8 * 12, %eax
1891	jb	2b
1892	sfence
1893	movl	$6144, %ecx
1894	xorl	%eax, %eax
1895	rep; stosb
1896	mfence
1897
1898	vmovdqa	PCPU(MDS_TMP), %ymm0
1899	testb	$CR0_TS, %al
1900	je	3f
1901	movq	%rax, %cr0
19023:	popq	%rdi
1903	popq	%rcx
1904	popq	%rdx
1905	popq	%rax
1906	retq
1907END(mds_handler_skl_avx)
1908
1909ENTRY(mds_handler_skl_avx512)
1910	pushq	%rax
1911	pushq	%rdx
1912	pushq	%rcx
1913	pushq	%rdi
1914
1915	movq	%cr0, %rax
1916	testb	$CR0_TS, %al
1917	je	1f
1918	clts
19191:	movq	PCPU(MDS_BUF), %rdi
1920	movq	PCPU(MDS_BUF64), %rdx
1921	vmovdqa64	%zmm0, PCPU(MDS_TMP)
1922	vpxord	%zmm0, %zmm0, %zmm0
1923
1924	lfence
1925	vorpd	(%rdx), %zmm0, %zmm0
1926	vorpd	(%rdx), %zmm0, %zmm0
1927	xorl	%eax, %eax
19282:	clflushopt	5376(%rdi, %rax, 8)
1929	addl	$8, %eax
1930	cmpl	$8 * 12, %eax
1931	jb	2b
1932	sfence
1933	movl	$6144, %ecx
1934	xorl	%eax, %eax
1935	rep; stosb
1936	mfence
1937
1938	vmovdqa64	PCPU(MDS_TMP), %zmm0
1939	testb	$CR0_TS, %al
1940	je	3f
1941	movq	%rax, %cr0
19423:	popq	%rdi
1943	popq	%rcx
1944	popq	%rdx
1945	popq	%rax
1946	retq
1947END(mds_handler_skl_avx512)
1948
1949ENTRY(mds_handler_silvermont)
1950	pushq	%rax
1951	pushq	%rdx
1952	pushq	%rcx
1953
1954	movq	%cr0, %rax
1955	testb	$CR0_TS, %al
1956	je	1f
1957	clts
19581:	movq	PCPU(MDS_BUF), %rdx
1959	movdqa	%xmm0, PCPU(MDS_TMP)
1960	pxor	%xmm0, %xmm0
1961
1962	movl	$16, %ecx
19632:	movntdq	%xmm0, (%rdx)
1964	addq	$16, %rdx
1965	decl	%ecx
1966	jnz	2b
1967	mfence
1968
1969	movdqa	PCPU(MDS_TMP),%xmm0
1970	testb	$CR0_TS, %al
1971	je	3f
1972	movq	%rax, %cr0
19733:	popq	%rcx
1974	popq	%rdx
1975	popq	%rax
1976	retq
1977END(mds_handler_silvermont)
1978
1979/*
1980 * Do the same as Linux and execute IRET explicitly, despite IPI
1981 * return does it as well.
1982 */
1983ENTRY(cpu_sync_core)
1984/*
1985 * Can utilize SERIALIZE when instruction is moved from
1986 * 'future extensions' to SDM.
1987 */
1988	movq	(%rsp), %rdx
1989	movl	%ss, %eax
1990	pushq	%rax
1991	pushq	%rsp
1992	addq	$16, (%rsp)
1993	pushfq
1994	movl	%cs, %eax
1995	pushq	%rax
1996	pushq	%rdx
1997	iretq
1998END(cpu_sync_core)
1999