xref: /titanic_41/usr/src/uts/intel/ia32/ml/i86_subr.s (revision edf70dc9b8e373c558a49c15c2d86be817d497fe)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*
28 *  Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.
29 *  Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T
30 *    All Rights Reserved
31 */
32
33/*
34 * General assembly language routines.
35 * It is the intent of this file to contain routines that are
36 * independent of the specific kernel architecture, and those that are
37 * common across kernel architectures.
38 * As architectures diverge, and implementations of specific
39 * architecture-dependent routines change, the routines should be moved
40 * from this file into the respective ../`arch -k`/subr.s file.
41 */
42
43#include <sys/asm_linkage.h>
44#include <sys/asm_misc.h>
45#include <sys/panic.h>
46#include <sys/ontrap.h>
47#include <sys/regset.h>
48#include <sys/privregs.h>
49#include <sys/reboot.h>
50#include <sys/psw.h>
51#include <sys/x86_archext.h>
52
53#if defined(__lint)
54#include <sys/types.h>
55#include <sys/systm.h>
56#include <sys/thread.h>
57#include <sys/archsystm.h>
58#include <sys/byteorder.h>
59#include <sys/dtrace.h>
60#include <sys/ftrace.h>
61#else	/* __lint */
62#include "assym.h"
63#endif	/* __lint */
64#include <sys/dditypes.h>
65
66/*
67 * on_fault()
68 * Catch lofault faults. Like setjmp except it returns one
69 * if code following causes uncorrectable fault. Turned off
70 * by calling no_fault().
71 */
72
73#if defined(__lint)
74
75/* ARGSUSED */
76int
77on_fault(label_t *ljb)
78{ return (0); }
79
80void
81no_fault(void)
82{}
83
84#else	/* __lint */
85
86#if defined(__amd64)
87
88	ENTRY(on_fault)
89	movq	%gs:CPU_THREAD, %rsi
90	leaq	catch_fault(%rip), %rdx
91	movq	%rdi, T_ONFAULT(%rsi)		/* jumpbuf in t_onfault */
92	movq	%rdx, T_LOFAULT(%rsi)		/* catch_fault in t_lofault */
93	jmp	setjmp				/* let setjmp do the rest */
94
95catch_fault:
96	movq	%gs:CPU_THREAD, %rsi
97	movq	T_ONFAULT(%rsi), %rdi		/* address of save area */
98	xorl	%eax, %eax
99	movq	%rax, T_ONFAULT(%rsi)		/* turn off onfault */
100	movq	%rax, T_LOFAULT(%rsi)		/* turn off lofault */
101	jmp	longjmp				/* let longjmp do the rest */
102	SET_SIZE(on_fault)
103
104	ENTRY(no_fault)
105	movq	%gs:CPU_THREAD, %rsi
106	xorl	%eax, %eax
107	movq	%rax, T_ONFAULT(%rsi)		/* turn off onfault */
108	movq	%rax, T_LOFAULT(%rsi)		/* turn off lofault */
109	ret
110	SET_SIZE(no_fault)
111
112#elif defined(__i386)
113
114	ENTRY(on_fault)
115	movl	%gs:CPU_THREAD, %edx
116	movl	4(%esp), %eax			/* jumpbuf address */
117	leal	catch_fault, %ecx
118	movl	%eax, T_ONFAULT(%edx)		/* jumpbuf in t_onfault */
119	movl	%ecx, T_LOFAULT(%edx)		/* catch_fault in t_lofault */
120	jmp	setjmp				/* let setjmp do the rest */
121
122catch_fault:
123	movl	%gs:CPU_THREAD, %edx
124	xorl	%eax, %eax
125	movl	T_ONFAULT(%edx), %ecx		/* address of save area */
126	movl	%eax, T_ONFAULT(%edx)		/* turn off onfault */
127	movl	%eax, T_LOFAULT(%edx)		/* turn off lofault */
128	pushl	%ecx
129	call	longjmp				/* let longjmp do the rest */
130	SET_SIZE(on_fault)
131
132	ENTRY(no_fault)
133	movl	%gs:CPU_THREAD, %edx
134	xorl	%eax, %eax
135	movl	%eax, T_ONFAULT(%edx)		/* turn off onfault */
136	movl	%eax, T_LOFAULT(%edx)		/* turn off lofault */
137	ret
138	SET_SIZE(no_fault)
139
140#endif	/* __i386 */
141#endif	/* __lint */
142
143/*
144 * Default trampoline code for on_trap() (see <sys/ontrap.h>).  We just
145 * do a longjmp(&curthread->t_ontrap->ot_jmpbuf) if this is ever called.
146 */
147
148#if defined(lint)
149
150void
151on_trap_trampoline(void)
152{}
153
154#else	/* __lint */
155
156#if defined(__amd64)
157
158	ENTRY(on_trap_trampoline)
159	movq	%gs:CPU_THREAD, %rsi
160	movq	T_ONTRAP(%rsi), %rdi
161	addq	$OT_JMPBUF, %rdi
162	jmp	longjmp
163	SET_SIZE(on_trap_trampoline)
164
165#elif defined(__i386)
166
167	ENTRY(on_trap_trampoline)
168	movl	%gs:CPU_THREAD, %eax
169	movl	T_ONTRAP(%eax), %eax
170	addl	$OT_JMPBUF, %eax
171	pushl	%eax
172	call	longjmp
173	SET_SIZE(on_trap_trampoline)
174
175#endif	/* __i386 */
176#endif	/* __lint */
177
178/*
179 * Push a new element on to the t_ontrap stack.  Refer to <sys/ontrap.h> for
180 * more information about the on_trap() mechanism.  If the on_trap_data is the
181 * same as the topmost stack element, we just modify that element.
182 */
183#if defined(lint)
184
185/*ARGSUSED*/
186int
187on_trap(on_trap_data_t *otp, uint_t prot)
188{ return (0); }
189
190#else	/* __lint */
191
192#if defined(__amd64)
193
194	ENTRY(on_trap)
195	movw	%si, OT_PROT(%rdi)		/* ot_prot = prot */
196	movw	$0, OT_TRAP(%rdi)		/* ot_trap = 0 */
197	leaq	on_trap_trampoline(%rip), %rdx	/* rdx = &on_trap_trampoline */
198	movq	%rdx, OT_TRAMPOLINE(%rdi)	/* ot_trampoline = rdx */
199	xorl	%ecx, %ecx
200	movq	%rcx, OT_HANDLE(%rdi)		/* ot_handle = NULL */
201	movq	%rcx, OT_PAD1(%rdi)		/* ot_pad1 = NULL */
202	movq	%gs:CPU_THREAD, %rdx		/* rdx = curthread */
203	movq	T_ONTRAP(%rdx), %rcx		/* rcx = curthread->t_ontrap */
204	cmpq	%rdi, %rcx			/* if (otp == %rcx)	*/
205	je	0f				/*	don't modify t_ontrap */
206
207	movq	%rcx, OT_PREV(%rdi)		/* ot_prev = t_ontrap */
208	movq	%rdi, T_ONTRAP(%rdx)		/* curthread->t_ontrap = otp */
209
2100:	addq	$OT_JMPBUF, %rdi		/* &ot_jmpbuf */
211	jmp	setjmp
212	SET_SIZE(on_trap)
213
214#elif defined(__i386)
215
216	ENTRY(on_trap)
217	movl	4(%esp), %eax			/* %eax = otp */
218	movl	8(%esp), %edx			/* %edx = prot */
219
220	movw	%dx, OT_PROT(%eax)		/* ot_prot = prot */
221	movw	$0, OT_TRAP(%eax)		/* ot_trap = 0 */
222	leal	on_trap_trampoline, %edx	/* %edx = &on_trap_trampoline */
223	movl	%edx, OT_TRAMPOLINE(%eax)	/* ot_trampoline = %edx */
224	movl	$0, OT_HANDLE(%eax)		/* ot_handle = NULL */
225	movl	$0, OT_PAD1(%eax)		/* ot_pad1 = NULL */
226	movl	%gs:CPU_THREAD, %edx		/* %edx = curthread */
227	movl	T_ONTRAP(%edx), %ecx		/* %ecx = curthread->t_ontrap */
228	cmpl	%eax, %ecx			/* if (otp == %ecx) */
229	je	0f				/*    don't modify t_ontrap */
230
231	movl	%ecx, OT_PREV(%eax)		/* ot_prev = t_ontrap */
232	movl	%eax, T_ONTRAP(%edx)		/* curthread->t_ontrap = otp */
233
2340:	addl	$OT_JMPBUF, %eax		/* %eax = &ot_jmpbuf */
235	movl	%eax, 4(%esp)			/* put %eax back on the stack */
236	jmp	setjmp				/* let setjmp do the rest */
237	SET_SIZE(on_trap)
238
239#endif	/* __i386 */
240#endif	/* __lint */
241
242/*
243 * Setjmp and longjmp implement non-local gotos using state vectors
244 * type label_t.
245 */
246
247#if defined(__lint)
248
249/* ARGSUSED */
250int
251setjmp(label_t *lp)
252{ return (0); }
253
254/* ARGSUSED */
255void
256longjmp(label_t *lp)
257{}
258
259#else	/* __lint */
260
261#if LABEL_PC != 0
262#error LABEL_PC MUST be defined as 0 for setjmp/longjmp to work as coded
263#endif	/* LABEL_PC != 0 */
264
265#if defined(__amd64)
266
267	ENTRY(setjmp)
268	movq	%rsp, LABEL_SP(%rdi)
269	movq	%rbp, LABEL_RBP(%rdi)
270	movq	%rbx, LABEL_RBX(%rdi)
271	movq	%r12, LABEL_R12(%rdi)
272	movq	%r13, LABEL_R13(%rdi)
273	movq	%r14, LABEL_R14(%rdi)
274	movq	%r15, LABEL_R15(%rdi)
275	movq	(%rsp), %rdx		/* return address */
276	movq	%rdx, (%rdi)		/* LABEL_PC is 0 */
277	xorl	%eax, %eax		/* return 0 */
278	ret
279	SET_SIZE(setjmp)
280
281	ENTRY(longjmp)
282	movq	LABEL_SP(%rdi), %rsp
283	movq	LABEL_RBP(%rdi), %rbp
284	movq	LABEL_RBX(%rdi), %rbx
285	movq	LABEL_R12(%rdi), %r12
286	movq	LABEL_R13(%rdi), %r13
287	movq	LABEL_R14(%rdi), %r14
288	movq	LABEL_R15(%rdi), %r15
289	movq	(%rdi), %rdx		/* return address; LABEL_PC is 0 */
290	movq	%rdx, (%rsp)
291	xorl	%eax, %eax
292	incl	%eax			/* return 1 */
293	ret
294	SET_SIZE(longjmp)
295
296#elif defined(__i386)
297
298	ENTRY(setjmp)
299	movl	4(%esp), %edx		/* address of save area */
300	movl	%ebp, LABEL_EBP(%edx)
301	movl	%ebx, LABEL_EBX(%edx)
302	movl	%esi, LABEL_ESI(%edx)
303	movl	%edi, LABEL_EDI(%edx)
304	movl	%esp, 4(%edx)
305	movl	(%esp), %ecx		/* %eip (return address) */
306	movl	%ecx, (%edx)		/* LABEL_PC is 0 */
307	subl	%eax, %eax		/* return 0 */
308	ret
309	SET_SIZE(setjmp)
310
311	ENTRY(longjmp)
312	movl	4(%esp), %edx		/* address of save area */
313	movl	LABEL_EBP(%edx), %ebp
314	movl	LABEL_EBX(%edx), %ebx
315	movl	LABEL_ESI(%edx), %esi
316	movl	LABEL_EDI(%edx), %edi
317	movl	4(%edx), %esp
318	movl	(%edx), %ecx		/* %eip (return addr); LABEL_PC is 0 */
319	movl	$1, %eax
320	addl	$4, %esp		/* pop ret adr */
321	jmp	*%ecx			/* indirect */
322	SET_SIZE(longjmp)
323
324#endif	/* __i386 */
325#endif	/* __lint */
326
327/*
328 * if a() calls b() calls caller(),
329 * caller() returns return address in a().
330 * (Note: We assume a() and b() are C routines which do the normal entry/exit
331 *  sequence.)
332 */
333
334#if defined(__lint)
335
336caddr_t
337caller(void)
338{ return (0); }
339
340#else	/* __lint */
341
342#if defined(__amd64)
343
344	ENTRY(caller)
345	movq	8(%rbp), %rax		/* b()'s return pc, in a() */
346	ret
347	SET_SIZE(caller)
348
349#elif defined(__i386)
350
351	ENTRY(caller)
352	movl	4(%ebp), %eax		/* b()'s return pc, in a() */
353	ret
354	SET_SIZE(caller)
355
356#endif	/* __i386 */
357#endif	/* __lint */
358
359/*
360 * if a() calls callee(), callee() returns the
361 * return address in a();
362 */
363
364#if defined(__lint)
365
366caddr_t
367callee(void)
368{ return (0); }
369
370#else	/* __lint */
371
372#if defined(__amd64)
373
374	ENTRY(callee)
375	movq	(%rsp), %rax		/* callee()'s return pc, in a() */
376	ret
377	SET_SIZE(callee)
378
379#elif defined(__i386)
380
381	ENTRY(callee)
382	movl	(%esp), %eax		/* callee()'s return pc, in a() */
383	ret
384	SET_SIZE(callee)
385
386#endif	/* __i386 */
387#endif	/* __lint */
388
389/*
390 * return the current frame pointer
391 */
392
393#if defined(__lint)
394
395greg_t
396getfp(void)
397{ return (0); }
398
399#else	/* __lint */
400
401#if defined(__amd64)
402
403	ENTRY(getfp)
404	movq	%rbp, %rax
405	ret
406	SET_SIZE(getfp)
407
408#elif defined(__i386)
409
410	ENTRY(getfp)
411	movl	%ebp, %eax
412	ret
413	SET_SIZE(getfp)
414
415#endif	/* __i386 */
416#endif	/* __lint */
417
418/*
419 * Invalidate a single page table entry in the TLB
420 */
421
422#if defined(__lint)
423
424/* ARGSUSED */
425void
426mmu_tlbflush_entry(caddr_t m)
427{}
428
429#else	/* __lint */
430
431#if defined(__amd64)
432
433	ENTRY(mmu_tlbflush_entry)
434	invlpg	(%rdi)
435	ret
436	SET_SIZE(mmu_tlbflush_entry)
437
438#elif defined(__i386)
439
440	ENTRY(mmu_tlbflush_entry)
441	movl	4(%esp), %eax
442	invlpg	(%eax)
443	ret
444	SET_SIZE(mmu_tlbflush_entry)
445
446#endif	/* __i386 */
447#endif	/* __lint */
448
449
450/*
451 * Get/Set the value of various control registers
452 */
453
454#if defined(__lint)
455
456ulong_t
457getcr0(void)
458{ return (0); }
459
460/* ARGSUSED */
461void
462setcr0(ulong_t value)
463{}
464
465ulong_t
466getcr2(void)
467{ return (0); }
468
469ulong_t
470getcr3(void)
471{ return (0); }
472
473#if !defined(__xpv)
474/* ARGSUSED */
475void
476setcr3(ulong_t val)
477{}
478
479void
480reload_cr3(void)
481{}
482#endif
483
484ulong_t
485getcr4(void)
486{ return (0); }
487
488/* ARGSUSED */
489void
490setcr4(ulong_t val)
491{}
492
493#if defined(__amd64)
494
495ulong_t
496getcr8(void)
497{ return (0); }
498
499/* ARGSUSED */
500void
501setcr8(ulong_t val)
502{}
503
504#endif	/* __amd64 */
505
506#else	/* __lint */
507
508#if defined(__amd64)
509
510	ENTRY(getcr0)
511	movq	%cr0, %rax
512	ret
513	SET_SIZE(getcr0)
514
515	ENTRY(setcr0)
516	movq	%rdi, %cr0
517	ret
518	SET_SIZE(setcr0)
519
520        ENTRY(getcr2)
521#if defined(__xpv)
522	movq	%gs:CPU_VCPU_INFO, %rax
523	movq	VCPU_INFO_ARCH_CR2(%rax), %rax
524#else
525        movq    %cr2, %rax
526#endif
527        ret
528	SET_SIZE(getcr2)
529
530	ENTRY(getcr3)
531	movq    %cr3, %rax
532	ret
533	SET_SIZE(getcr3)
534
535#if !defined(__xpv)
536
537        ENTRY(setcr3)
538        movq    %rdi, %cr3
539        ret
540	SET_SIZE(setcr3)
541
542	ENTRY(reload_cr3)
543	movq	%cr3, %rdi
544	movq	%rdi, %cr3
545	ret
546	SET_SIZE(reload_cr3)
547
548#endif	/* __xpv */
549
550	ENTRY(getcr4)
551	movq	%cr4, %rax
552	ret
553	SET_SIZE(getcr4)
554
555	ENTRY(setcr4)
556	movq	%rdi, %cr4
557	ret
558	SET_SIZE(setcr4)
559
560	ENTRY(getcr8)
561	movq	%cr8, %rax
562	ret
563	SET_SIZE(getcr8)
564
565	ENTRY(setcr8)
566	movq	%rdi, %cr8
567	ret
568	SET_SIZE(setcr8)
569
570#elif defined(__i386)
571
572        ENTRY(getcr0)
573        movl    %cr0, %eax
574        ret
575	SET_SIZE(getcr0)
576
577        ENTRY(setcr0)
578        movl    4(%esp), %eax
579        movl    %eax, %cr0
580        ret
581	SET_SIZE(setcr0)
582
583        ENTRY(getcr2)
584#if defined(__xpv)
585	movl	%gs:CPU_VCPU_INFO, %eax
586	movl	VCPU_INFO_ARCH_CR2(%eax), %eax
587#else
588        movl    %cr2, %eax
589#endif
590        ret
591	SET_SIZE(getcr2)
592
593	ENTRY(getcr3)
594	movl    %cr3, %eax
595	ret
596	SET_SIZE(getcr3)
597
598#if !defined(__xpv)
599
600        ENTRY(setcr3)
601        movl    4(%esp), %eax
602        movl    %eax, %cr3
603        ret
604	SET_SIZE(setcr3)
605
606	ENTRY(reload_cr3)
607	movl    %cr3, %eax
608	movl    %eax, %cr3
609	ret
610	SET_SIZE(reload_cr3)
611
612#endif	/* __xpv */
613
614	ENTRY(getcr4)
615	movl    %cr4, %eax
616	ret
617	SET_SIZE(getcr4)
618
619        ENTRY(setcr4)
620        movl    4(%esp), %eax
621        movl    %eax, %cr4
622        ret
623	SET_SIZE(setcr4)
624
625#endif	/* __i386 */
626#endif	/* __lint */
627
628#if defined(__lint)
629
630/*ARGSUSED*/
631uint32_t
632__cpuid_insn(struct cpuid_regs *regs)
633{ return (0); }
634
635#else	/* __lint */
636
637#if defined(__amd64)
638
639	ENTRY(__cpuid_insn)
640	movq	%rbx, %r8
641	movq	%rcx, %r9
642	movq	%rdx, %r11
643	movl	(%rdi), %eax		/* %eax = regs->cp_eax */
644	movl	0x4(%rdi), %ebx		/* %ebx = regs->cp_ebx */
645	movl	0x8(%rdi), %ecx		/* %ecx = regs->cp_ecx */
646	movl	0xc(%rdi), %edx		/* %edx = regs->cp_edx */
647	cpuid
648	movl	%eax, (%rdi)		/* regs->cp_eax = %eax */
649	movl	%ebx, 0x4(%rdi)		/* regs->cp_ebx = %ebx */
650	movl	%ecx, 0x8(%rdi)		/* regs->cp_ecx = %ecx */
651	movl	%edx, 0xc(%rdi)		/* regs->cp_edx = %edx */
652	movq	%r8, %rbx
653	movq	%r9, %rcx
654	movq	%r11, %rdx
655	ret
656	SET_SIZE(__cpuid_insn)
657
658#elif defined(__i386)
659
660        ENTRY(__cpuid_insn)
661	pushl	%ebp
662	movl	0x8(%esp), %ebp		/* %ebp = regs */
663	pushl	%ebx
664	pushl	%ecx
665	pushl	%edx
666	movl	(%ebp), %eax		/* %eax = regs->cp_eax */
667	movl	0x4(%ebp), %ebx		/* %ebx = regs->cp_ebx */
668	movl	0x8(%ebp), %ecx		/* %ecx = regs->cp_ecx */
669	movl	0xc(%ebp), %edx		/* %edx = regs->cp_edx */
670	cpuid
671	movl	%eax, (%ebp)		/* regs->cp_eax = %eax */
672	movl	%ebx, 0x4(%ebp)		/* regs->cp_ebx = %ebx */
673	movl	%ecx, 0x8(%ebp)		/* regs->cp_ecx = %ecx */
674	movl	%edx, 0xc(%ebp)		/* regs->cp_edx = %edx */
675	popl	%edx
676	popl	%ecx
677	popl	%ebx
678	popl	%ebp
679	ret
680	SET_SIZE(__cpuid_insn)
681
682#endif	/* __i386 */
683#endif	/* __lint */
684
685#if defined(__lint)
686
687/*ARGSUSED*/
688void
689i86_monitor(volatile uint32_t *addr, uint32_t extensions, uint32_t hints)
690{}
691
692#else   /* __lint */
693
694#if defined(__amd64)
695
696	ENTRY_NP(i86_monitor)
697	pushq	%rbp
698	movq	%rsp, %rbp
699	movq	%rdi, %rax		/* addr */
700	movq	%rsi, %rcx		/* extensions */
701	/* rdx contains input arg3: hints */
702	clflush	(%rax)
703	.byte	0x0f, 0x01, 0xc8	/* monitor */
704	leave
705	ret
706	SET_SIZE(i86_monitor)
707
708#elif defined(__i386)
709
710ENTRY_NP(i86_monitor)
711	pushl	%ebp
712	movl	%esp, %ebp
713	movl	0x8(%ebp),%eax		/* addr */
714	movl	0xc(%ebp),%ecx		/* extensions */
715	movl	0x10(%ebp),%edx		/* hints */
716	clflush	(%eax)
717	.byte	0x0f, 0x01, 0xc8	/* monitor */
718	leave
719	ret
720	SET_SIZE(i86_monitor)
721
722#endif	/* __i386 */
723#endif	/* __lint */
724
725#if defined(__lint)
726
727/*ARGSUSED*/
728void
729i86_mwait(uint32_t data, uint32_t extensions)
730{}
731
732#else	/* __lint */
733
734#if defined(__amd64)
735
736	ENTRY_NP(i86_mwait)
737	pushq	%rbp
738	movq	%rsp, %rbp
739	movq	%rdi, %rax		/* data */
740	movq	%rsi, %rcx		/* extensions */
741	.byte	0x0f, 0x01, 0xc9	/* mwait */
742	leave
743	ret
744	SET_SIZE(i86_mwait)
745
746#elif defined(__i386)
747
748	ENTRY_NP(i86_mwait)
749	pushl	%ebp
750	movl	%esp, %ebp
751	movl	0x8(%ebp),%eax		/* data */
752	movl	0xc(%ebp),%ecx		/* extensions */
753	.byte	0x0f, 0x01, 0xc9	/* mwait */
754	leave
755	ret
756	SET_SIZE(i86_mwait)
757
758#endif	/* __i386 */
759#endif	/* __lint */
760
761#if defined(__xpv)
762	/*
763	 * Defined in C
764	 */
765#else
766
767#if defined(__lint)
768
769hrtime_t
770tsc_read(void)
771{
772	return (0);
773}
774
775#else	/* __lint */
776
777#if defined(__amd64)
778
779	ENTRY_NP(tsc_read)
780	movq	%rbx, %r11
781	movl	$0, %eax
782	cpuid
783	rdtsc
784	movq	%r11, %rbx
785	shlq	$32, %rdx
786	orq	%rdx, %rax
787	ret
788	.globl _tsc_mfence_start
789_tsc_mfence_start:
790	mfence
791	rdtsc
792	shlq	$32, %rdx
793	orq	%rdx, %rax
794	ret
795	.globl _tsc_mfence_end
796_tsc_mfence_end:
797	.globl _tscp_start
798_tscp_start:
799	.byte	0x0f, 0x01, 0xf9	/* rdtscp instruction */
800	shlq	$32, %rdx
801	orq	%rdx, %rax
802	ret
803	.globl _tscp_end
804_tscp_end:
805	.globl _no_rdtsc_start
806_no_rdtsc_start:
807	xorl	%edx, %edx
808	xorl	%eax, %eax
809	ret
810	.globl _no_rdtsc_end
811_no_rdtsc_end:
812	.globl _tsc_lfence_start
813_tsc_lfence_start:
814	lfence
815	rdtsc
816	shlq	$32, %rdx
817	orq	%rdx, %rax
818	ret
819	.globl _tsc_lfence_end
820_tsc_lfence_end:
821	SET_SIZE(tsc_read)
822
823#else /* __i386 */
824
825	ENTRY_NP(tsc_read)
826	pushl	%ebx
827	movl	$0, %eax
828	cpuid
829	rdtsc
830	popl	%ebx
831	ret
832	.globl _tsc_mfence_start
833_tsc_mfence_start:
834	mfence
835	rdtsc
836	ret
837	.globl _tsc_mfence_end
838_tsc_mfence_end:
839	.globl	_tscp_start
840_tscp_start:
841	.byte	0x0f, 0x01, 0xf9	/* rdtscp instruction */
842	ret
843	.globl _tscp_end
844_tscp_end:
845	.globl _no_rdtsc_start
846_no_rdtsc_start:
847	xorl	%edx, %edx
848	xorl	%eax, %eax
849	ret
850	.globl _no_rdtsc_end
851_no_rdtsc_end:
852	.globl _tsc_lfence_start
853_tsc_lfence_start:
854	lfence
855	rdtsc
856	ret
857	.globl _tsc_lfence_end
858_tsc_lfence_end:
859	SET_SIZE(tsc_read)
860
861#endif	/* __i386 */
862
863#endif	/* __lint */
864
865
866#endif	/* __xpv */
867
868#ifdef __lint
869/*
870 * Do not use this function for obtaining clock tick.  This
871 * is called by callers who do not need to have a guarenteed
872 * correct tick value.  The proper routine to use is tsc_read().
873 */
874hrtime_t
875randtick(void)
876{
877	return (0);
878}
879#else
880#if defined(__amd64)
881	ENTRY_NP(randtick)
882	rdtsc
883	shlq    $32, %rdx
884	orq     %rdx, %rax
885	ret
886	SET_SIZE(randtick)
887#else
888	ENTRY_NP(randtick)
889	rdtsc
890	ret
891	SET_SIZE(randtick)
892#endif /* __i386 */
893#endif /* __lint */
894/*
895 * Insert entryp after predp in a doubly linked list.
896 */
897
898#if defined(__lint)
899
900/*ARGSUSED*/
901void
902_insque(caddr_t entryp, caddr_t predp)
903{}
904
905#else	/* __lint */
906
907#if defined(__amd64)
908
909	ENTRY(_insque)
910	movq	(%rsi), %rax		/* predp->forw			*/
911	movq	%rsi, CPTRSIZE(%rdi)	/* entryp->back = predp		*/
912	movq	%rax, (%rdi)		/* entryp->forw = predp->forw	*/
913	movq	%rdi, (%rsi)		/* predp->forw = entryp		*/
914	movq	%rdi, CPTRSIZE(%rax)	/* predp->forw->back = entryp	*/
915	ret
916	SET_SIZE(_insque)
917
918#elif defined(__i386)
919
920	ENTRY(_insque)
921	movl	8(%esp), %edx
922	movl	4(%esp), %ecx
923	movl	(%edx), %eax		/* predp->forw			*/
924	movl	%edx, CPTRSIZE(%ecx)	/* entryp->back = predp		*/
925	movl	%eax, (%ecx)		/* entryp->forw = predp->forw	*/
926	movl	%ecx, (%edx)		/* predp->forw = entryp		*/
927	movl	%ecx, CPTRSIZE(%eax)	/* predp->forw->back = entryp	*/
928	ret
929	SET_SIZE(_insque)
930
931#endif	/* __i386 */
932#endif	/* __lint */
933
934/*
935 * Remove entryp from a doubly linked list
936 */
937
938#if defined(__lint)
939
940/*ARGSUSED*/
941void
942_remque(caddr_t entryp)
943{}
944
945#else	/* __lint */
946
947#if defined(__amd64)
948
949	ENTRY(_remque)
950	movq	(%rdi), %rax		/* entry->forw */
951	movq	CPTRSIZE(%rdi), %rdx	/* entry->back */
952	movq	%rax, (%rdx)		/* entry->back->forw = entry->forw */
953	movq	%rdx, CPTRSIZE(%rax)	/* entry->forw->back = entry->back */
954	ret
955	SET_SIZE(_remque)
956
957#elif defined(__i386)
958
959	ENTRY(_remque)
960	movl	4(%esp), %ecx
961	movl	(%ecx), %eax		/* entry->forw */
962	movl	CPTRSIZE(%ecx), %edx	/* entry->back */
963	movl	%eax, (%edx)		/* entry->back->forw = entry->forw */
964	movl	%edx, CPTRSIZE(%eax)	/* entry->forw->back = entry->back */
965	ret
966	SET_SIZE(_remque)
967
968#endif	/* __i386 */
969#endif	/* __lint */
970
971/*
972 * Returns the number of
973 * non-NULL bytes in string argument.
974 */
975
976#if defined(__lint)
977
978/* ARGSUSED */
979size_t
980strlen(const char *str)
981{ return (0); }
982
983#else	/* __lint */
984
985#if defined(__amd64)
986
987/*
988 * This is close to a simple transliteration of a C version of this
989 * routine.  We should either just -make- this be a C version, or
990 * justify having it in assembler by making it significantly faster.
991 *
992 * size_t
993 * strlen(const char *s)
994 * {
995 *	const char *s0;
996 * #if defined(DEBUG)
997 *	if ((uintptr_t)s < KERNELBASE)
998 *		panic(.str_panic_msg);
999 * #endif
1000 *	for (s0 = s; *s; s++)
1001 *		;
1002 *	return (s - s0);
1003 * }
1004 */
1005
1006	ENTRY(strlen)
1007#ifdef DEBUG
1008	movq	postbootkernelbase(%rip), %rax
1009	cmpq	%rax, %rdi
1010	jae	str_valid
1011	pushq	%rbp
1012	movq	%rsp, %rbp
1013	leaq	.str_panic_msg(%rip), %rdi
1014	xorl	%eax, %eax
1015	call	panic
1016#endif	/* DEBUG */
1017str_valid:
1018	cmpb	$0, (%rdi)
1019	movq	%rdi, %rax
1020	je	.null_found
1021	.align	4
1022.strlen_loop:
1023	incq	%rdi
1024	cmpb	$0, (%rdi)
1025	jne	.strlen_loop
1026.null_found:
1027	subq	%rax, %rdi
1028	movq	%rdi, %rax
1029	ret
1030	SET_SIZE(strlen)
1031
1032#elif defined(__i386)
1033
1034	ENTRY(strlen)
1035#ifdef DEBUG
1036	movl	postbootkernelbase, %eax
1037	cmpl	%eax, 4(%esp)
1038	jae	str_valid
1039	pushl	%ebp
1040	movl	%esp, %ebp
1041	pushl	$.str_panic_msg
1042	call	panic
1043#endif /* DEBUG */
1044
1045str_valid:
1046	movl	4(%esp), %eax		/* %eax = string address */
1047	testl	$3, %eax		/* if %eax not word aligned */
1048	jnz	.not_word_aligned	/* goto .not_word_aligned */
1049	.align	4
1050.word_aligned:
1051	movl	(%eax), %edx		/* move 1 word from (%eax) to %edx */
1052	movl	$0x7f7f7f7f, %ecx
1053	andl	%edx, %ecx		/* %ecx = %edx & 0x7f7f7f7f */
1054	addl	$4, %eax		/* next word */
1055	addl	$0x7f7f7f7f, %ecx	/* %ecx += 0x7f7f7f7f */
1056	orl	%edx, %ecx		/* %ecx |= %edx */
1057	andl	$0x80808080, %ecx	/* %ecx &= 0x80808080 */
1058	cmpl	$0x80808080, %ecx	/* if no null byte in this word */
1059	je	.word_aligned		/* goto .word_aligned */
1060	subl	$4, %eax		/* post-incremented */
1061.not_word_aligned:
1062	cmpb	$0, (%eax)		/* if a byte in (%eax) is null */
1063	je	.null_found		/* goto .null_found */
1064	incl	%eax			/* next byte */
1065	testl	$3, %eax		/* if %eax not word aligned */
1066	jnz	.not_word_aligned	/* goto .not_word_aligned */
1067	jmp	.word_aligned		/* goto .word_aligned */
1068	.align	4
1069.null_found:
1070	subl	4(%esp), %eax		/* %eax -= string address */
1071	ret
1072	SET_SIZE(strlen)
1073
1074#endif	/* __i386 */
1075
1076#ifdef DEBUG
1077	.text
1078.str_panic_msg:
1079	.string "strlen: argument below kernelbase"
1080#endif /* DEBUG */
1081
1082#endif	/* __lint */
1083
1084	/*
1085	 * Berkeley 4.3 introduced symbolically named interrupt levels
1086	 * as a way deal with priority in a machine independent fashion.
1087	 * Numbered priorities are machine specific, and should be
1088	 * discouraged where possible.
1089	 *
1090	 * Note, for the machine specific priorities there are
1091	 * examples listed for devices that use a particular priority.
1092	 * It should not be construed that all devices of that
1093	 * type should be at that priority.  It is currently were
1094	 * the current devices fit into the priority scheme based
1095	 * upon time criticalness.
1096	 *
1097	 * The underlying assumption of these assignments is that
1098	 * IPL 10 is the highest level from which a device
1099	 * routine can call wakeup.  Devices that interrupt from higher
1100	 * levels are restricted in what they can do.  If they need
1101	 * kernels services they should schedule a routine at a lower
1102	 * level (via software interrupt) to do the required
1103	 * processing.
1104	 *
1105	 * Examples of this higher usage:
1106	 *	Level	Usage
1107	 *	14	Profiling clock (and PROM uart polling clock)
1108	 *	12	Serial ports
1109	 *
1110	 * The serial ports request lower level processing on level 6.
1111	 *
1112	 * Also, almost all splN routines (where N is a number or a
1113	 * mnemonic) will do a RAISE(), on the assumption that they are
1114	 * never used to lower our priority.
1115	 * The exceptions are:
1116	 *	spl8()		Because you can't be above 15 to begin with!
1117	 *	splzs()		Because this is used at boot time to lower our
1118	 *			priority, to allow the PROM to poll the uart.
1119	 *	spl0()		Used to lower priority to 0.
1120	 */
1121
1122#if defined(__lint)
1123
1124int spl0(void)		{ return (0); }
1125int spl6(void)		{ return (0); }
1126int spl7(void)		{ return (0); }
1127int spl8(void)		{ return (0); }
1128int splhigh(void)	{ return (0); }
1129int splhi(void)		{ return (0); }
1130int splzs(void)		{ return (0); }
1131
1132/* ARGSUSED */
1133void
1134splx(int level)
1135{}
1136
1137#else	/* __lint */
1138
1139#if defined(__amd64)
1140
1141#define	SETPRI(level) \
1142	movl	$/**/level, %edi;	/* new priority */		\
1143	jmp	do_splx			/* redirect to do_splx */
1144
1145#define	RAISE(level) \
1146	movl	$/**/level, %edi;	/* new priority */		\
1147	jmp	splr			/* redirect to splr */
1148
1149#elif defined(__i386)
1150
1151#define	SETPRI(level) \
1152	pushl	$/**/level;	/* new priority */			\
1153	call	do_splx;	/* invoke common splx code */		\
1154	addl	$4, %esp;	/* unstack arg */			\
1155	ret
1156
1157#define	RAISE(level) \
1158	pushl	$/**/level;	/* new priority */			\
1159	call	splr;		/* invoke common splr code */		\
1160	addl	$4, %esp;	/* unstack args */			\
1161	ret
1162
1163#endif	/* __i386 */
1164
1165	/* locks out all interrupts, including memory errors */
1166	ENTRY(spl8)
1167	SETPRI(15)
1168	SET_SIZE(spl8)
1169
1170	/* just below the level that profiling runs */
1171	ENTRY(spl7)
1172	RAISE(13)
1173	SET_SIZE(spl7)
1174
1175	/* sun specific - highest priority onboard serial i/o asy ports */
1176	ENTRY(splzs)
1177	SETPRI(12)	/* Can't be a RAISE, as it's used to lower us */
1178	SET_SIZE(splzs)
1179
1180	ENTRY(splhi)
1181	ALTENTRY(splhigh)
1182	ALTENTRY(spl6)
1183	ALTENTRY(i_ddi_splhigh)
1184
1185	RAISE(DISP_LEVEL)
1186
1187	SET_SIZE(i_ddi_splhigh)
1188	SET_SIZE(spl6)
1189	SET_SIZE(splhigh)
1190	SET_SIZE(splhi)
1191
1192	/* allow all interrupts */
1193	ENTRY(spl0)
1194	SETPRI(0)
1195	SET_SIZE(spl0)
1196
1197
1198	/* splx implementation */
1199	ENTRY(splx)
1200	jmp	do_splx		/* redirect to common splx code */
1201	SET_SIZE(splx)
1202
1203#endif	/* __lint */
1204
1205#if defined(__i386)
1206
1207/*
1208 * Read and write the %gs register
1209 */
1210
1211#if defined(__lint)
1212
1213/*ARGSUSED*/
1214uint16_t
1215getgs(void)
1216{ return (0); }
1217
1218/*ARGSUSED*/
1219void
1220setgs(uint16_t sel)
1221{}
1222
1223#else	/* __lint */
1224
1225	ENTRY(getgs)
1226	clr	%eax
1227	movw	%gs, %ax
1228	ret
1229	SET_SIZE(getgs)
1230
1231	ENTRY(setgs)
1232	movw	4(%esp), %gs
1233	ret
1234	SET_SIZE(setgs)
1235
1236#endif	/* __lint */
1237#endif	/* __i386 */
1238
1239#if defined(__lint)
1240
1241void
1242pc_reset(void)
1243{}
1244
1245void
1246efi_reset(void)
1247{}
1248
1249#else	/* __lint */
1250
1251	ENTRY(wait_500ms)
1252#if defined(__amd64)
1253	pushq	%rbx
1254#elif defined(__i386)
1255	push	%ebx
1256#endif
1257	movl	$50000, %ebx
12581:
1259	call	tenmicrosec
1260	decl	%ebx
1261	jnz	1b
1262#if defined(__amd64)
1263	popq	%rbx
1264#elif defined(__i386)
1265	pop	%ebx
1266#endif
1267	ret
1268	SET_SIZE(wait_500ms)
1269
1270#define	RESET_METHOD_KBC	1
1271#define	RESET_METHOD_PORT92	2
1272#define RESET_METHOD_PCI	4
1273
1274	DGDEF3(pc_reset_methods, 4, 8)
1275	.long RESET_METHOD_KBC|RESET_METHOD_PORT92|RESET_METHOD_PCI;
1276
1277	ENTRY(pc_reset)
1278
1279#if defined(__i386)
1280	testl	$RESET_METHOD_KBC, pc_reset_methods
1281#elif defined(__amd64)
1282	testl	$RESET_METHOD_KBC, pc_reset_methods(%rip)
1283#endif
1284	jz	1f
1285
1286	/
1287	/ Try the classic keyboard controller-triggered reset.
1288	/
1289	movw	$0x64, %dx
1290	movb	$0xfe, %al
1291	outb	(%dx)
1292
1293	/ Wait up to 500 milliseconds here for the keyboard controller
1294	/ to pull the reset line.  On some systems where the keyboard
1295	/ controller is slow to pull the reset line, the next reset method
1296	/ may be executed (which may be bad if those systems hang when the
1297	/ next reset method is used, e.g. Ferrari 3400 (doesn't like port 92),
1298	/ and Ferrari 4000 (doesn't like the cf9 reset method))
1299
1300	call	wait_500ms
1301
13021:
1303#if defined(__i386)
1304	testl	$RESET_METHOD_PORT92, pc_reset_methods
1305#elif defined(__amd64)
1306	testl	$RESET_METHOD_PORT92, pc_reset_methods(%rip)
1307#endif
1308	jz	3f
1309
1310	/
1311	/ Try port 0x92 fast reset
1312	/
1313	movw	$0x92, %dx
1314	inb	(%dx)
1315	cmpb	$0xff, %al	/ If port's not there, we should get back 0xFF
1316	je	1f
1317	testb	$1, %al		/ If bit 0
1318	jz	2f		/ is clear, jump to perform the reset
1319	andb	$0xfe, %al	/ otherwise,
1320	outb	(%dx)		/ clear bit 0 first, then
13212:
1322	orb	$1, %al		/ Set bit 0
1323	outb	(%dx)		/ and reset the system
13241:
1325
1326	call	wait_500ms
1327
13283:
1329#if defined(__i386)
1330	testl	$RESET_METHOD_PCI, pc_reset_methods
1331#elif defined(__amd64)
1332	testl	$RESET_METHOD_PCI, pc_reset_methods(%rip)
1333#endif
1334	jz	4f
1335
1336	/ Try the PCI (soft) reset vector (should work on all modern systems,
1337	/ but has been shown to cause problems on 450NX systems, and some newer
1338	/ systems (e.g. ATI IXP400-equipped systems))
1339	/ When resetting via this method, 2 writes are required.  The first
1340	/ targets bit 1 (0=hard reset without power cycle, 1=hard reset with
1341	/ power cycle).
1342	/ The reset occurs on the second write, during bit 2's transition from
1343	/ 0->1.
1344	movw	$0xcf9, %dx
1345	movb	$0x2, %al	/ Reset mode = hard, no power cycle
1346	outb	(%dx)
1347	movb	$0x6, %al
1348	outb	(%dx)
1349
1350	call	wait_500ms
1351
13524:
1353	/
1354	/ port 0xcf9 failed also.  Last-ditch effort is to
1355	/ triple-fault the CPU.
1356	/ Also, use triple fault for EFI firmware
1357	/
1358	ENTRY(efi_reset)
1359#if defined(__amd64)
1360	pushq	$0x0
1361	pushq	$0x0		/ IDT base of 0, limit of 0 + 2 unused bytes
1362	lidt	(%rsp)
1363#elif defined(__i386)
1364	pushl	$0x0
1365	pushl	$0x0		/ IDT base of 0, limit of 0 + 2 unused bytes
1366	lidt	(%esp)
1367#endif
1368	int	$0x0		/ Trigger interrupt, generate triple-fault
1369
1370	cli
1371	hlt			/ Wait forever
1372	/*NOTREACHED*/
1373	SET_SIZE(efi_reset)
1374	SET_SIZE(pc_reset)
1375
1376#endif	/* __lint */
1377
1378/*
1379 * C callable in and out routines
1380 */
1381
1382#if defined(__lint)
1383
1384/* ARGSUSED */
1385void
1386outl(int port_address, uint32_t val)
1387{}
1388
1389#else	/* __lint */
1390
1391#if defined(__amd64)
1392
1393	ENTRY(outl)
1394	movw	%di, %dx
1395	movl	%esi, %eax
1396	outl	(%dx)
1397	ret
1398	SET_SIZE(outl)
1399
1400#elif defined(__i386)
1401
1402	.set	PORT, 4
1403	.set	VAL, 8
1404
1405	ENTRY(outl)
1406	movw	PORT(%esp), %dx
1407	movl	VAL(%esp), %eax
1408	outl	(%dx)
1409	ret
1410	SET_SIZE(outl)
1411
1412#endif	/* __i386 */
1413#endif	/* __lint */
1414
1415#if defined(__lint)
1416
1417/* ARGSUSED */
1418void
1419outw(int port_address, uint16_t val)
1420{}
1421
1422#else	/* __lint */
1423
1424#if defined(__amd64)
1425
1426	ENTRY(outw)
1427	movw	%di, %dx
1428	movw	%si, %ax
1429	D16 outl (%dx)		/* XX64 why not outw? */
1430	ret
1431	SET_SIZE(outw)
1432
1433#elif defined(__i386)
1434
1435	ENTRY(outw)
1436	movw	PORT(%esp), %dx
1437	movw	VAL(%esp), %ax
1438	D16 outl (%dx)
1439	ret
1440	SET_SIZE(outw)
1441
1442#endif	/* __i386 */
1443#endif	/* __lint */
1444
1445#if defined(__lint)
1446
1447/* ARGSUSED */
1448void
1449outb(int port_address, uint8_t val)
1450{}
1451
1452#else	/* __lint */
1453
1454#if defined(__amd64)
1455
1456	ENTRY(outb)
1457	movw	%di, %dx
1458	movb	%sil, %al
1459	outb	(%dx)
1460	ret
1461	SET_SIZE(outb)
1462
1463#elif defined(__i386)
1464
1465	ENTRY(outb)
1466	movw	PORT(%esp), %dx
1467	movb	VAL(%esp), %al
1468	outb	(%dx)
1469	ret
1470	SET_SIZE(outb)
1471
1472#endif	/* __i386 */
1473#endif	/* __lint */
1474
1475#if defined(__lint)
1476
1477/* ARGSUSED */
1478uint32_t
1479inl(int port_address)
1480{ return (0); }
1481
1482#else	/* __lint */
1483
1484#if defined(__amd64)
1485
1486	ENTRY(inl)
1487	xorl	%eax, %eax
1488	movw	%di, %dx
1489	inl	(%dx)
1490	ret
1491	SET_SIZE(inl)
1492
1493#elif defined(__i386)
1494
1495	ENTRY(inl)
1496	movw	PORT(%esp), %dx
1497	inl	(%dx)
1498	ret
1499	SET_SIZE(inl)
1500
1501#endif	/* __i386 */
1502#endif	/* __lint */
1503
1504#if defined(__lint)
1505
1506/* ARGSUSED */
1507uint16_t
1508inw(int port_address)
1509{ return (0); }
1510
1511#else	/* __lint */
1512
1513#if defined(__amd64)
1514
1515	ENTRY(inw)
1516	xorl	%eax, %eax
1517	movw	%di, %dx
1518	D16 inl	(%dx)
1519	ret
1520	SET_SIZE(inw)
1521
1522#elif defined(__i386)
1523
1524	ENTRY(inw)
1525	subl	%eax, %eax
1526	movw	PORT(%esp), %dx
1527	D16 inl	(%dx)
1528	ret
1529	SET_SIZE(inw)
1530
1531#endif	/* __i386 */
1532#endif	/* __lint */
1533
1534
1535#if defined(__lint)
1536
1537/* ARGSUSED */
1538uint8_t
1539inb(int port_address)
1540{ return (0); }
1541
1542#else	/* __lint */
1543
1544#if defined(__amd64)
1545
1546	ENTRY(inb)
1547	xorl	%eax, %eax
1548	movw	%di, %dx
1549	inb	(%dx)
1550	ret
1551	SET_SIZE(inb)
1552
1553#elif defined(__i386)
1554
1555	ENTRY(inb)
1556	subl    %eax, %eax
1557	movw	PORT(%esp), %dx
1558	inb	(%dx)
1559	ret
1560	SET_SIZE(inb)
1561
1562#endif	/* __i386 */
1563#endif	/* __lint */
1564
1565
1566#if defined(__lint)
1567
1568/* ARGSUSED */
1569void
1570repoutsw(int port, uint16_t *addr, int cnt)
1571{}
1572
1573#else	/* __lint */
1574
1575#if defined(__amd64)
1576
1577	ENTRY(repoutsw)
1578	movl	%edx, %ecx
1579	movw	%di, %dx
1580	rep
1581	  D16 outsl
1582	ret
1583	SET_SIZE(repoutsw)
1584
1585#elif defined(__i386)
1586
1587	/*
1588	 * The arguments and saved registers are on the stack in the
1589	 *  following order:
1590	 *      |  cnt  |  +16
1591	 *      | *addr |  +12
1592	 *      | port  |  +8
1593	 *      |  eip  |  +4
1594	 *      |  esi  |  <-- %esp
1595	 * If additional values are pushed onto the stack, make sure
1596	 * to adjust the following constants accordingly.
1597	 */
1598	.set	PORT, 8
1599	.set	ADDR, 12
1600	.set	COUNT, 16
1601
1602	ENTRY(repoutsw)
1603	pushl	%esi
1604	movl	PORT(%esp), %edx
1605	movl	ADDR(%esp), %esi
1606	movl	COUNT(%esp), %ecx
1607	rep
1608	  D16 outsl
1609	popl	%esi
1610	ret
1611	SET_SIZE(repoutsw)
1612
1613#endif	/* __i386 */
1614#endif	/* __lint */
1615
1616
1617#if defined(__lint)
1618
1619/* ARGSUSED */
1620void
1621repinsw(int port_addr, uint16_t *addr, int cnt)
1622{}
1623
1624#else	/* __lint */
1625
1626#if defined(__amd64)
1627
1628	ENTRY(repinsw)
1629	movl	%edx, %ecx
1630	movw	%di, %dx
1631	rep
1632	  D16 insl
1633	ret
1634	SET_SIZE(repinsw)
1635
1636#elif defined(__i386)
1637
1638	ENTRY(repinsw)
1639	pushl	%edi
1640	movl	PORT(%esp), %edx
1641	movl	ADDR(%esp), %edi
1642	movl	COUNT(%esp), %ecx
1643	rep
1644	  D16 insl
1645	popl	%edi
1646	ret
1647	SET_SIZE(repinsw)
1648
1649#endif	/* __i386 */
1650#endif	/* __lint */
1651
1652
1653#if defined(__lint)
1654
1655/* ARGSUSED */
1656void
1657repinsb(int port, uint8_t *addr, int count)
1658{}
1659
1660#else	/* __lint */
1661
1662#if defined(__amd64)
1663
1664	ENTRY(repinsb)
1665	movl	%edx, %ecx
1666	movw	%di, %dx
1667	movq	%rsi, %rdi
1668	rep
1669	  insb
1670	ret
1671	SET_SIZE(repinsb)
1672
1673#elif defined(__i386)
1674
1675	/*
1676	 * The arguments and saved registers are on the stack in the
1677	 *  following order:
1678	 *      |  cnt  |  +16
1679	 *      | *addr |  +12
1680	 *      | port  |  +8
1681	 *      |  eip  |  +4
1682	 *      |  esi  |  <-- %esp
1683	 * If additional values are pushed onto the stack, make sure
1684	 * to adjust the following constants accordingly.
1685	 */
1686	.set	IO_PORT, 8
1687	.set	IO_ADDR, 12
1688	.set	IO_COUNT, 16
1689
1690	ENTRY(repinsb)
1691	pushl	%edi
1692	movl	IO_ADDR(%esp), %edi
1693	movl	IO_COUNT(%esp), %ecx
1694	movl	IO_PORT(%esp), %edx
1695	rep
1696	  insb
1697	popl	%edi
1698	ret
1699	SET_SIZE(repinsb)
1700
1701#endif	/* __i386 */
1702#endif	/* __lint */
1703
1704
1705/*
1706 * Input a stream of 32-bit words.
1707 * NOTE: count is a DWORD count.
1708 */
1709#if defined(__lint)
1710
1711/* ARGSUSED */
1712void
1713repinsd(int port, uint32_t *addr, int count)
1714{}
1715
1716#else	/* __lint */
1717
1718#if defined(__amd64)
1719
1720	ENTRY(repinsd)
1721	movl	%edx, %ecx
1722	movw	%di, %dx
1723	movq	%rsi, %rdi
1724	rep
1725	  insl
1726	ret
1727	SET_SIZE(repinsd)
1728
1729#elif defined(__i386)
1730
1731	ENTRY(repinsd)
1732	pushl	%edi
1733	movl	IO_ADDR(%esp), %edi
1734	movl	IO_COUNT(%esp), %ecx
1735	movl	IO_PORT(%esp), %edx
1736	rep
1737	  insl
1738	popl	%edi
1739	ret
1740	SET_SIZE(repinsd)
1741
1742#endif	/* __i386 */
1743#endif	/* __lint */
1744
1745/*
1746 * Output a stream of bytes
1747 * NOTE: count is a byte count
1748 */
1749#if defined(__lint)
1750
1751/* ARGSUSED */
1752void
1753repoutsb(int port, uint8_t *addr, int count)
1754{}
1755
1756#else	/* __lint */
1757
1758#if defined(__amd64)
1759
1760	ENTRY(repoutsb)
1761	movl	%edx, %ecx
1762	movw	%di, %dx
1763	rep
1764	  outsb
1765	ret
1766	SET_SIZE(repoutsb)
1767
1768#elif defined(__i386)
1769
1770	ENTRY(repoutsb)
1771	pushl	%esi
1772	movl	IO_ADDR(%esp), %esi
1773	movl	IO_COUNT(%esp), %ecx
1774	movl	IO_PORT(%esp), %edx
1775	rep
1776	  outsb
1777	popl	%esi
1778	ret
1779	SET_SIZE(repoutsb)
1780
1781#endif	/* __i386 */
1782#endif	/* __lint */
1783
1784/*
1785 * Output a stream of 32-bit words
1786 * NOTE: count is a DWORD count
1787 */
1788#if defined(__lint)
1789
1790/* ARGSUSED */
1791void
1792repoutsd(int port, uint32_t *addr, int count)
1793{}
1794
1795#else	/* __lint */
1796
1797#if defined(__amd64)
1798
1799	ENTRY(repoutsd)
1800	movl	%edx, %ecx
1801	movw	%di, %dx
1802	rep
1803	  outsl
1804	ret
1805	SET_SIZE(repoutsd)
1806
1807#elif defined(__i386)
1808
1809	ENTRY(repoutsd)
1810	pushl	%esi
1811	movl	IO_ADDR(%esp), %esi
1812	movl	IO_COUNT(%esp), %ecx
1813	movl	IO_PORT(%esp), %edx
1814	rep
1815	  outsl
1816	popl	%esi
1817	ret
1818	SET_SIZE(repoutsd)
1819
1820#endif	/* __i386 */
1821#endif	/* __lint */
1822
1823/*
1824 * void int3(void)
1825 * void int18(void)
1826 * void int20(void)
1827 * void int_cmci(void)
1828 */
1829
1830#if defined(__lint)
1831
1832void
1833int3(void)
1834{}
1835
1836void
1837int18(void)
1838{}
1839
1840void
1841int20(void)
1842{}
1843
1844void
1845int_cmci(void)
1846{}
1847
1848#else	/* __lint */
1849
1850	ENTRY(int3)
1851	int	$T_BPTFLT
1852	ret
1853	SET_SIZE(int3)
1854
1855	ENTRY(int18)
1856	int	$T_MCE
1857	ret
1858	SET_SIZE(int18)
1859
1860	ENTRY(int20)
1861	movl	boothowto, %eax
1862	andl	$RB_DEBUG, %eax
1863	jz	1f
1864
1865	int	$T_DBGENTR
18661:
1867	rep;	ret	/* use 2 byte return instruction when branch target */
1868			/* AMD Software Optimization Guide - Section 6.2 */
1869	SET_SIZE(int20)
1870
1871	ENTRY(int_cmci)
1872	int	$T_ENOEXTFLT
1873	ret
1874	SET_SIZE(int_cmci)
1875
1876#endif	/* __lint */
1877
1878#if defined(__lint)
1879
1880/* ARGSUSED */
1881int
1882scanc(size_t size, uchar_t *cp, uchar_t *table, uchar_t mask)
1883{ return (0); }
1884
1885#else	/* __lint */
1886
1887#if defined(__amd64)
1888
1889	ENTRY(scanc)
1890					/* rdi == size */
1891					/* rsi == cp */
1892					/* rdx == table */
1893					/* rcx == mask */
1894	addq	%rsi, %rdi		/* end = &cp[size] */
1895.scanloop:
1896	cmpq	%rdi, %rsi		/* while (cp < end */
1897	jnb	.scandone
1898	movzbq	(%rsi), %r8		/* %r8 = *cp */
1899	incq	%rsi			/* cp++ */
1900	testb	%cl, (%r8, %rdx)
1901	jz	.scanloop		/*  && (table[*cp] & mask) == 0) */
1902	decq	%rsi			/* (fix post-increment) */
1903.scandone:
1904	movl	%edi, %eax
1905	subl	%esi, %eax		/* return (end - cp) */
1906	ret
1907	SET_SIZE(scanc)
1908
1909#elif defined(__i386)
1910
1911	ENTRY(scanc)
1912	pushl	%edi
1913	pushl	%esi
1914	movb	24(%esp), %cl		/* mask = %cl */
1915	movl	16(%esp), %esi		/* cp = %esi */
1916	movl	20(%esp), %edx		/* table = %edx */
1917	movl	%esi, %edi
1918	addl	12(%esp), %edi		/* end = &cp[size]; */
1919.scanloop:
1920	cmpl	%edi, %esi		/* while (cp < end */
1921	jnb	.scandone
1922	movzbl	(%esi),  %eax		/* %al = *cp */
1923	incl	%esi			/* cp++ */
1924	movb	(%edx,  %eax), %al	/* %al = table[*cp] */
1925	testb	%al, %cl
1926	jz	.scanloop		/*   && (table[*cp] & mask) == 0) */
1927	dec	%esi			/* post-incremented */
1928.scandone:
1929	movl	%edi, %eax
1930	subl	%esi, %eax		/* return (end - cp) */
1931	popl	%esi
1932	popl	%edi
1933	ret
1934	SET_SIZE(scanc)
1935
1936#endif	/* __i386 */
1937#endif	/* __lint */
1938
1939/*
1940 * Replacement functions for ones that are normally inlined.
1941 * In addition to the copy in i86.il, they are defined here just in case.
1942 */
1943
1944#if defined(__lint)
1945
1946ulong_t
1947intr_clear(void)
1948{ return (0); }
1949
1950ulong_t
1951clear_int_flag(void)
1952{ return (0); }
1953
1954#else	/* __lint */
1955
1956#if defined(__amd64)
1957
1958	ENTRY(intr_clear)
1959	ENTRY(clear_int_flag)
1960	pushfq
1961	popq	%rax
1962#if defined(__xpv)
1963	leaq	xpv_panicking, %rdi
1964	movl	(%rdi), %edi
1965	cmpl	$0, %edi
1966	jne	2f
1967	CLIRET(%rdi, %dl)	/* returns event mask in %dl */
1968	/*
1969	 * Synthesize the PS_IE bit from the event mask bit
1970	 */
1971	andq    $_BITNOT(PS_IE), %rax
1972	testb	$1, %dl
1973	jnz	1f
1974	orq	$PS_IE, %rax
19751:
1976	ret
19772:
1978#endif
1979	CLI(%rdi)
1980	ret
1981	SET_SIZE(clear_int_flag)
1982	SET_SIZE(intr_clear)
1983
1984#elif defined(__i386)
1985
1986	ENTRY(intr_clear)
1987	ENTRY(clear_int_flag)
1988	pushfl
1989	popl	%eax
1990#if defined(__xpv)
1991	leal	xpv_panicking, %edx
1992	movl	(%edx), %edx
1993	cmpl	$0, %edx
1994	jne	2f
1995	CLIRET(%edx, %cl)	/* returns event mask in %cl */
1996	/*
1997	 * Synthesize the PS_IE bit from the event mask bit
1998	 */
1999	andl    $_BITNOT(PS_IE), %eax
2000	testb	$1, %cl
2001	jnz	1f
2002	orl	$PS_IE, %eax
20031:
2004	ret
20052:
2006#endif
2007	CLI(%edx)
2008	ret
2009	SET_SIZE(clear_int_flag)
2010	SET_SIZE(intr_clear)
2011
2012#endif	/* __i386 */
2013#endif	/* __lint */
2014
2015#if defined(__lint)
2016
2017struct cpu *
2018curcpup(void)
2019{ return 0; }
2020
2021#else	/* __lint */
2022
2023#if defined(__amd64)
2024
2025	ENTRY(curcpup)
2026	movq	%gs:CPU_SELF, %rax
2027	ret
2028	SET_SIZE(curcpup)
2029
2030#elif defined(__i386)
2031
2032	ENTRY(curcpup)
2033	movl	%gs:CPU_SELF, %eax
2034	ret
2035	SET_SIZE(curcpup)
2036
2037#endif	/* __i386 */
2038#endif	/* __lint */
2039
2040/* htonll(), ntohll(), htonl(), ntohl(), htons(), ntohs()
2041 * These functions reverse the byte order of the input parameter and returns
2042 * the result.  This is to convert the byte order from host byte order
2043 * (little endian) to network byte order (big endian), or vice versa.
2044 */
2045
2046#if defined(__lint)
2047
2048uint64_t
2049htonll(uint64_t i)
2050{ return (i); }
2051
2052uint64_t
2053ntohll(uint64_t i)
2054{ return (i); }
2055
2056uint32_t
2057htonl(uint32_t i)
2058{ return (i); }
2059
2060uint32_t
2061ntohl(uint32_t i)
2062{ return (i); }
2063
2064uint16_t
2065htons(uint16_t i)
2066{ return (i); }
2067
2068uint16_t
2069ntohs(uint16_t i)
2070{ return (i); }
2071
2072#else	/* __lint */
2073
2074#if defined(__amd64)
2075
2076	ENTRY(htonll)
2077	ALTENTRY(ntohll)
2078	movq	%rdi, %rax
2079	bswapq	%rax
2080	ret
2081	SET_SIZE(ntohll)
2082	SET_SIZE(htonll)
2083
2084	/* XX64 there must be shorter sequences for this */
2085	ENTRY(htonl)
2086	ALTENTRY(ntohl)
2087	movl	%edi, %eax
2088	bswap	%eax
2089	ret
2090	SET_SIZE(ntohl)
2091	SET_SIZE(htonl)
2092
2093	/* XX64 there must be better sequences for this */
2094	ENTRY(htons)
2095	ALTENTRY(ntohs)
2096	movl	%edi, %eax
2097	bswap	%eax
2098	shrl	$16, %eax
2099	ret
2100	SET_SIZE(ntohs)
2101	SET_SIZE(htons)
2102
2103#elif defined(__i386)
2104
2105	ENTRY(htonll)
2106	ALTENTRY(ntohll)
2107	movl	4(%esp), %edx
2108	movl	8(%esp), %eax
2109	bswap	%edx
2110	bswap	%eax
2111	ret
2112	SET_SIZE(ntohll)
2113	SET_SIZE(htonll)
2114
2115	ENTRY(htonl)
2116	ALTENTRY(ntohl)
2117	movl	4(%esp), %eax
2118	bswap	%eax
2119	ret
2120	SET_SIZE(ntohl)
2121	SET_SIZE(htonl)
2122
2123	ENTRY(htons)
2124	ALTENTRY(ntohs)
2125	movl	4(%esp), %eax
2126	bswap	%eax
2127	shrl	$16, %eax
2128	ret
2129	SET_SIZE(ntohs)
2130	SET_SIZE(htons)
2131
2132#endif	/* __i386 */
2133#endif	/* __lint */
2134
2135
2136#if defined(__lint)
2137
2138/* ARGSUSED */
2139void
2140intr_restore(ulong_t i)
2141{ return; }
2142
2143/* ARGSUSED */
2144void
2145restore_int_flag(ulong_t i)
2146{ return; }
2147
2148#else	/* __lint */
2149
2150#if defined(__amd64)
2151
2152	ENTRY(intr_restore)
2153	ENTRY(restore_int_flag)
2154	testq	$PS_IE, %rdi
2155	jz	1f
2156#if defined(__xpv)
2157	leaq	xpv_panicking, %rsi
2158	movl	(%rsi), %esi
2159	cmpl	$0, %esi
2160	jne	1f
2161	/*
2162	 * Since we're -really- running unprivileged, our attempt
2163	 * to change the state of the IF bit will be ignored.
2164	 * The virtual IF bit is tweaked by CLI and STI.
2165	 */
2166	IE_TO_EVENT_MASK(%rsi, %rdi)
2167#else
2168	sti
2169#endif
21701:
2171	ret
2172	SET_SIZE(restore_int_flag)
2173	SET_SIZE(intr_restore)
2174
2175#elif defined(__i386)
2176
2177	ENTRY(intr_restore)
2178	ENTRY(restore_int_flag)
2179	testl	$PS_IE, 4(%esp)
2180	jz	1f
2181#if defined(__xpv)
2182	leal	xpv_panicking, %edx
2183	movl	(%edx), %edx
2184	cmpl	$0, %edx
2185	jne	1f
2186	/*
2187	 * Since we're -really- running unprivileged, our attempt
2188	 * to change the state of the IF bit will be ignored.
2189	 * The virtual IF bit is tweaked by CLI and STI.
2190	 */
2191	IE_TO_EVENT_MASK(%edx, 4(%esp))
2192#else
2193	sti
2194#endif
21951:
2196	ret
2197	SET_SIZE(restore_int_flag)
2198	SET_SIZE(intr_restore)
2199
2200#endif	/* __i386 */
2201#endif	/* __lint */
2202
2203#if defined(__lint)
2204
2205void
2206sti(void)
2207{}
2208
2209void
2210cli(void)
2211{}
2212
2213#else	/* __lint */
2214
2215	ENTRY(sti)
2216	STI
2217	ret
2218	SET_SIZE(sti)
2219
2220	ENTRY(cli)
2221#if defined(__amd64)
2222	CLI(%rax)
2223#elif defined(__i386)
2224	CLI(%eax)
2225#endif	/* __i386 */
2226	ret
2227	SET_SIZE(cli)
2228
2229#endif	/* __lint */
2230
2231#if defined(__lint)
2232
2233dtrace_icookie_t
2234dtrace_interrupt_disable(void)
2235{ return (0); }
2236
2237#else   /* __lint */
2238
2239#if defined(__amd64)
2240
2241	ENTRY(dtrace_interrupt_disable)
2242	pushfq
2243	popq	%rax
2244#if defined(__xpv)
2245	leaq	xpv_panicking, %rdi
2246	movl	(%rdi), %edi
2247	cmpl	$0, %edi
2248	jne	.dtrace_interrupt_disable_done
2249	CLIRET(%rdi, %dl)	/* returns event mask in %dl */
2250	/*
2251	 * Synthesize the PS_IE bit from the event mask bit
2252	 */
2253	andq    $_BITNOT(PS_IE), %rax
2254	testb	$1, %dl
2255	jnz	.dtrace_interrupt_disable_done
2256	orq	$PS_IE, %rax
2257#else
2258	CLI(%rdx)
2259#endif
2260.dtrace_interrupt_disable_done:
2261	ret
2262	SET_SIZE(dtrace_interrupt_disable)
2263
2264#elif defined(__i386)
2265
2266	ENTRY(dtrace_interrupt_disable)
2267	pushfl
2268	popl	%eax
2269#if defined(__xpv)
2270	leal	xpv_panicking, %edx
2271	movl	(%edx), %edx
2272	cmpl	$0, %edx
2273	jne	.dtrace_interrupt_disable_done
2274	CLIRET(%edx, %cl)	/* returns event mask in %cl */
2275	/*
2276	 * Synthesize the PS_IE bit from the event mask bit
2277	 */
2278	andl    $_BITNOT(PS_IE), %eax
2279	testb	$1, %cl
2280	jnz	.dtrace_interrupt_disable_done
2281	orl	$PS_IE, %eax
2282#else
2283	CLI(%edx)
2284#endif
2285.dtrace_interrupt_disable_done:
2286	ret
2287	SET_SIZE(dtrace_interrupt_disable)
2288
2289#endif	/* __i386 */
2290#endif	/* __lint */
2291
2292#if defined(__lint)
2293
2294/*ARGSUSED*/
2295void
2296dtrace_interrupt_enable(dtrace_icookie_t cookie)
2297{}
2298
2299#else	/* __lint */
2300
2301#if defined(__amd64)
2302
2303	ENTRY(dtrace_interrupt_enable)
2304	pushq	%rdi
2305	popfq
2306#if defined(__xpv)
2307	leaq	xpv_panicking, %rdx
2308	movl	(%rdx), %edx
2309	cmpl	$0, %edx
2310	jne	.dtrace_interrupt_enable_done
2311	/*
2312	 * Since we're -really- running unprivileged, our attempt
2313	 * to change the state of the IF bit will be ignored. The
2314	 * virtual IF bit is tweaked by CLI and STI.
2315	 */
2316	IE_TO_EVENT_MASK(%rdx, %rdi)
2317#endif
2318.dtrace_interrupt_enable_done:
2319	ret
2320	SET_SIZE(dtrace_interrupt_enable)
2321
2322#elif defined(__i386)
2323
2324	ENTRY(dtrace_interrupt_enable)
2325	movl	4(%esp), %eax
2326	pushl	%eax
2327	popfl
2328#if defined(__xpv)
2329	leal	xpv_panicking, %edx
2330	movl	(%edx), %edx
2331	cmpl	$0, %edx
2332	jne	.dtrace_interrupt_enable_done
2333	/*
2334	 * Since we're -really- running unprivileged, our attempt
2335	 * to change the state of the IF bit will be ignored. The
2336	 * virtual IF bit is tweaked by CLI and STI.
2337	 */
2338	IE_TO_EVENT_MASK(%edx, %eax)
2339#endif
2340.dtrace_interrupt_enable_done:
2341	ret
2342	SET_SIZE(dtrace_interrupt_enable)
2343
2344#endif	/* __i386 */
2345#endif	/* __lint */
2346
2347
2348#if defined(lint)
2349
2350void
2351dtrace_membar_producer(void)
2352{}
2353
2354void
2355dtrace_membar_consumer(void)
2356{}
2357
2358#else	/* __lint */
2359
2360	ENTRY(dtrace_membar_producer)
2361	rep;	ret	/* use 2 byte return instruction when branch target */
2362			/* AMD Software Optimization Guide - Section 6.2 */
2363	SET_SIZE(dtrace_membar_producer)
2364
2365	ENTRY(dtrace_membar_consumer)
2366	rep;	ret	/* use 2 byte return instruction when branch target */
2367			/* AMD Software Optimization Guide - Section 6.2 */
2368	SET_SIZE(dtrace_membar_consumer)
2369
2370#endif	/* __lint */
2371
2372#if defined(__lint)
2373
2374kthread_id_t
2375threadp(void)
2376{ return ((kthread_id_t)0); }
2377
2378#else	/* __lint */
2379
2380#if defined(__amd64)
2381
2382	ENTRY(threadp)
2383	movq	%gs:CPU_THREAD, %rax
2384	ret
2385	SET_SIZE(threadp)
2386
2387#elif defined(__i386)
2388
2389	ENTRY(threadp)
2390	movl	%gs:CPU_THREAD, %eax
2391	ret
2392	SET_SIZE(threadp)
2393
2394#endif	/* __i386 */
2395#endif	/* __lint */
2396
2397/*
2398 *   Checksum routine for Internet Protocol Headers
2399 */
2400
2401#if defined(__lint)
2402
2403/* ARGSUSED */
2404unsigned int
2405ip_ocsum(
2406	ushort_t *address,	/* ptr to 1st message buffer */
2407	int halfword_count,	/* length of data */
2408	unsigned int sum)	/* partial checksum */
2409{
2410	int		i;
2411	unsigned int	psum = 0;	/* partial sum */
2412
2413	for (i = 0; i < halfword_count; i++, address++) {
2414		psum += *address;
2415	}
2416
2417	while ((psum >> 16) != 0) {
2418		psum = (psum & 0xffff) + (psum >> 16);
2419	}
2420
2421	psum += sum;
2422
2423	while ((psum >> 16) != 0) {
2424		psum = (psum & 0xffff) + (psum >> 16);
2425	}
2426
2427	return (psum);
2428}
2429
2430#else	/* __lint */
2431
2432#if defined(__amd64)
2433
2434	ENTRY(ip_ocsum)
2435	pushq	%rbp
2436	movq	%rsp, %rbp
2437#ifdef DEBUG
2438	movq	postbootkernelbase(%rip), %rax
2439	cmpq	%rax, %rdi
2440	jnb	1f
2441	xorl	%eax, %eax
2442	movq	%rdi, %rsi
2443	leaq	.ip_ocsum_panic_msg(%rip), %rdi
2444	call	panic
2445	/*NOTREACHED*/
2446.ip_ocsum_panic_msg:
2447	.string	"ip_ocsum: address 0x%p below kernelbase\n"
24481:
2449#endif
2450	movl	%esi, %ecx	/* halfword_count */
2451	movq	%rdi, %rsi	/* address */
2452				/* partial sum in %edx */
2453	xorl	%eax, %eax
2454	testl	%ecx, %ecx
2455	jz	.ip_ocsum_done
2456	testq	$3, %rsi
2457	jnz	.ip_csum_notaligned
2458.ip_csum_aligned:	/* XX64 opportunities for 8-byte operations? */
2459.next_iter:
2460	/* XX64 opportunities for prefetch? */
2461	/* XX64 compute csum with 64 bit quantities? */
2462	subl	$32, %ecx
2463	jl	.less_than_32
2464
2465	addl	0(%rsi), %edx
2466.only60:
2467	adcl	4(%rsi), %eax
2468.only56:
2469	adcl	8(%rsi), %edx
2470.only52:
2471	adcl	12(%rsi), %eax
2472.only48:
2473	adcl	16(%rsi), %edx
2474.only44:
2475	adcl	20(%rsi), %eax
2476.only40:
2477	adcl	24(%rsi), %edx
2478.only36:
2479	adcl	28(%rsi), %eax
2480.only32:
2481	adcl	32(%rsi), %edx
2482.only28:
2483	adcl	36(%rsi), %eax
2484.only24:
2485	adcl	40(%rsi), %edx
2486.only20:
2487	adcl	44(%rsi), %eax
2488.only16:
2489	adcl	48(%rsi), %edx
2490.only12:
2491	adcl	52(%rsi), %eax
2492.only8:
2493	adcl	56(%rsi), %edx
2494.only4:
2495	adcl	60(%rsi), %eax	/* could be adding -1 and -1 with a carry */
2496.only0:
2497	adcl	$0, %eax	/* could be adding -1 in eax with a carry */
2498	adcl	$0, %eax
2499
2500	addq	$64, %rsi
2501	testl	%ecx, %ecx
2502	jnz	.next_iter
2503
2504.ip_ocsum_done:
2505	addl	%eax, %edx
2506	adcl	$0, %edx
2507	movl	%edx, %eax	/* form a 16 bit checksum by */
2508	shrl	$16, %eax	/* adding two halves of 32 bit checksum */
2509	addw	%dx, %ax
2510	adcw	$0, %ax
2511	andl	$0xffff, %eax
2512	leave
2513	ret
2514
2515.ip_csum_notaligned:
2516	xorl	%edi, %edi
2517	movw	(%rsi), %di
2518	addl	%edi, %edx
2519	adcl	$0, %edx
2520	addq	$2, %rsi
2521	decl	%ecx
2522	jmp	.ip_csum_aligned
2523
2524.less_than_32:
2525	addl	$32, %ecx
2526	testl	$1, %ecx
2527	jz	.size_aligned
2528	andl	$0xfe, %ecx
2529	movzwl	(%rsi, %rcx, 2), %edi
2530	addl	%edi, %edx
2531	adcl	$0, %edx
2532.size_aligned:
2533	movl	%ecx, %edi
2534	shrl	$1, %ecx
2535	shl	$1, %edi
2536	subq	$64, %rdi
2537	addq	%rdi, %rsi
2538	leaq    .ip_ocsum_jmptbl(%rip), %rdi
2539	leaq	(%rdi, %rcx, 8), %rdi
2540	xorl	%ecx, %ecx
2541	clc
2542	jmp 	*(%rdi)
2543
2544	.align	8
2545.ip_ocsum_jmptbl:
2546	.quad	.only0, .only4, .only8, .only12, .only16, .only20
2547	.quad	.only24, .only28, .only32, .only36, .only40, .only44
2548	.quad	.only48, .only52, .only56, .only60
2549	SET_SIZE(ip_ocsum)
2550
2551#elif defined(__i386)
2552
2553	ENTRY(ip_ocsum)
2554	pushl	%ebp
2555	movl	%esp, %ebp
2556	pushl	%ebx
2557	pushl	%esi
2558	pushl	%edi
2559	movl	12(%ebp), %ecx	/* count of half words */
2560	movl	16(%ebp), %edx	/* partial checksum */
2561	movl	8(%ebp), %esi
2562	xorl	%eax, %eax
2563	testl	%ecx, %ecx
2564	jz	.ip_ocsum_done
2565
2566	testl	$3, %esi
2567	jnz	.ip_csum_notaligned
2568.ip_csum_aligned:
2569.next_iter:
2570	subl	$32, %ecx
2571	jl	.less_than_32
2572
2573	addl	0(%esi), %edx
2574.only60:
2575	adcl	4(%esi), %eax
2576.only56:
2577	adcl	8(%esi), %edx
2578.only52:
2579	adcl	12(%esi), %eax
2580.only48:
2581	adcl	16(%esi), %edx
2582.only44:
2583	adcl	20(%esi), %eax
2584.only40:
2585	adcl	24(%esi), %edx
2586.only36:
2587	adcl	28(%esi), %eax
2588.only32:
2589	adcl	32(%esi), %edx
2590.only28:
2591	adcl	36(%esi), %eax
2592.only24:
2593	adcl	40(%esi), %edx
2594.only20:
2595	adcl	44(%esi), %eax
2596.only16:
2597	adcl	48(%esi), %edx
2598.only12:
2599	adcl	52(%esi), %eax
2600.only8:
2601	adcl	56(%esi), %edx
2602.only4:
2603	adcl	60(%esi), %eax	/* We could be adding -1 and -1 with a carry */
2604.only0:
2605	adcl	$0, %eax	/* we could be adding -1 in eax with a carry */
2606	adcl	$0, %eax
2607
2608	addl	$64, %esi
2609	andl	%ecx, %ecx
2610	jnz	.next_iter
2611
2612.ip_ocsum_done:
2613	addl	%eax, %edx
2614	adcl	$0, %edx
2615	movl	%edx, %eax	/* form a 16 bit checksum by */
2616	shrl	$16, %eax	/* adding two halves of 32 bit checksum */
2617	addw	%dx, %ax
2618	adcw	$0, %ax
2619	andl	$0xffff, %eax
2620	popl	%edi		/* restore registers */
2621	popl	%esi
2622	popl	%ebx
2623	leave
2624	ret
2625
2626.ip_csum_notaligned:
2627	xorl	%edi, %edi
2628	movw	(%esi), %di
2629	addl	%edi, %edx
2630	adcl	$0, %edx
2631	addl	$2, %esi
2632	decl	%ecx
2633	jmp	.ip_csum_aligned
2634
2635.less_than_32:
2636	addl	$32, %ecx
2637	testl	$1, %ecx
2638	jz	.size_aligned
2639	andl	$0xfe, %ecx
2640	movzwl	(%esi, %ecx, 2), %edi
2641	addl	%edi, %edx
2642	adcl	$0, %edx
2643.size_aligned:
2644	movl	%ecx, %edi
2645	shrl	$1, %ecx
2646	shl	$1, %edi
2647	subl	$64, %edi
2648	addl	%edi, %esi
2649	movl	$.ip_ocsum_jmptbl, %edi
2650	lea	(%edi, %ecx, 4), %edi
2651	xorl	%ecx, %ecx
2652	clc
2653	jmp 	*(%edi)
2654	SET_SIZE(ip_ocsum)
2655
2656	.data
2657	.align	4
2658
2659.ip_ocsum_jmptbl:
2660	.long	.only0, .only4, .only8, .only12, .only16, .only20
2661	.long	.only24, .only28, .only32, .only36, .only40, .only44
2662	.long	.only48, .only52, .only56, .only60
2663
2664
2665#endif	/* __i386 */
2666#endif	/* __lint */
2667
2668/*
2669 * multiply two long numbers and yield a u_longlong_t result, callable from C.
2670 * Provided to manipulate hrtime_t values.
2671 */
2672#if defined(__lint)
2673
2674/* result = a * b; */
2675
2676/* ARGSUSED */
2677unsigned long long
2678mul32(uint_t a, uint_t b)
2679{ return (0); }
2680
2681#else	/* __lint */
2682
2683#if defined(__amd64)
2684
2685	ENTRY(mul32)
2686	xorl	%edx, %edx	/* XX64 joe, paranoia? */
2687	movl	%edi, %eax
2688	mull	%esi
2689	shlq	$32, %rdx
2690	orq	%rdx, %rax
2691	ret
2692	SET_SIZE(mul32)
2693
2694#elif defined(__i386)
2695
2696	ENTRY(mul32)
2697	movl	8(%esp), %eax
2698	movl	4(%esp), %ecx
2699	mull	%ecx
2700	ret
2701	SET_SIZE(mul32)
2702
2703#endif	/* __i386 */
2704#endif	/* __lint */
2705
2706#if defined(notused)
2707#if defined(__lint)
2708/* ARGSUSED */
2709void
2710load_pte64(uint64_t *pte, uint64_t pte_value)
2711{}
2712#else	/* __lint */
2713	.globl load_pte64
2714load_pte64:
2715	movl	4(%esp), %eax
2716	movl	8(%esp), %ecx
2717	movl	12(%esp), %edx
2718	movl	%edx, 4(%eax)
2719	movl	%ecx, (%eax)
2720	ret
2721#endif	/* __lint */
2722#endif	/* notused */
2723
2724#if defined(__lint)
2725
2726/*ARGSUSED*/
2727void
2728scan_memory(caddr_t addr, size_t size)
2729{}
2730
2731#else	/* __lint */
2732
2733#if defined(__amd64)
2734
2735	ENTRY(scan_memory)
2736	shrq	$3, %rsi	/* convert %rsi from byte to quadword count */
2737	jz	.scanm_done
2738	movq	%rsi, %rcx	/* move count into rep control register */
2739	movq	%rdi, %rsi	/* move addr into lodsq control reg. */
2740	rep lodsq		/* scan the memory range */
2741.scanm_done:
2742	rep;	ret	/* use 2 byte return instruction when branch target */
2743			/* AMD Software Optimization Guide - Section 6.2 */
2744	SET_SIZE(scan_memory)
2745
2746#elif defined(__i386)
2747
2748	ENTRY(scan_memory)
2749	pushl	%ecx
2750	pushl	%esi
2751	movl	16(%esp), %ecx	/* move 2nd arg into rep control register */
2752	shrl	$2, %ecx	/* convert from byte count to word count */
2753	jz	.scanm_done
2754	movl	12(%esp), %esi	/* move 1st arg into lodsw control register */
2755	.byte	0xf3		/* rep prefix.  lame assembler.  sigh. */
2756	lodsl
2757.scanm_done:
2758	popl	%esi
2759	popl	%ecx
2760	ret
2761	SET_SIZE(scan_memory)
2762
2763#endif	/* __i386 */
2764#endif	/* __lint */
2765
2766
2767#if defined(__lint)
2768
2769/*ARGSUSED */
2770int
2771lowbit(ulong_t i)
2772{ return (0); }
2773
2774#else	/* __lint */
2775
2776#if defined(__amd64)
2777
2778	ENTRY(lowbit)
2779	movl	$-1, %eax
2780	bsfq	%rdi, %rax
2781	incl	%eax
2782	ret
2783	SET_SIZE(lowbit)
2784
2785#elif defined(__i386)
2786
2787	ENTRY(lowbit)
2788	movl	$-1, %eax
2789	bsfl	4(%esp), %eax
2790	incl	%eax
2791	ret
2792	SET_SIZE(lowbit)
2793
2794#endif	/* __i386 */
2795#endif	/* __lint */
2796
2797#if defined(__lint)
2798
2799/*ARGSUSED*/
2800int
2801highbit(ulong_t i)
2802{ return (0); }
2803
2804#else	/* __lint */
2805
2806#if defined(__amd64)
2807
2808	ENTRY(highbit)
2809	movl	$-1, %eax
2810	bsrq	%rdi, %rax
2811	incl	%eax
2812	ret
2813	SET_SIZE(highbit)
2814
2815#elif defined(__i386)
2816
2817	ENTRY(highbit)
2818	movl	$-1, %eax
2819	bsrl	4(%esp), %eax
2820	incl	%eax
2821	ret
2822	SET_SIZE(highbit)
2823
2824#endif	/* __i386 */
2825#endif	/* __lint */
2826
2827#if defined(__lint)
2828
2829/*ARGSUSED*/
2830uint64_t
2831rdmsr(uint_t r)
2832{ return (0); }
2833
2834/*ARGSUSED*/
2835void
2836wrmsr(uint_t r, const uint64_t val)
2837{}
2838
2839/*ARGSUSED*/
2840uint64_t
2841xrdmsr(uint_t r)
2842{ return (0); }
2843
2844/*ARGSUSED*/
2845void
2846xwrmsr(uint_t r, const uint64_t val)
2847{}
2848
2849void
2850invalidate_cache(void)
2851{}
2852
2853#else  /* __lint */
2854
2855#define	XMSR_ACCESS_VAL		$0x9c5a203a
2856
2857#if defined(__amd64)
2858
2859	ENTRY(rdmsr)
2860	movl	%edi, %ecx
2861	rdmsr
2862	shlq	$32, %rdx
2863	orq	%rdx, %rax
2864	ret
2865	SET_SIZE(rdmsr)
2866
2867	ENTRY(wrmsr)
2868	movq	%rsi, %rdx
2869	shrq	$32, %rdx
2870	movl	%esi, %eax
2871	movl	%edi, %ecx
2872	wrmsr
2873	ret
2874	SET_SIZE(wrmsr)
2875
2876	ENTRY(xrdmsr)
2877	pushq	%rbp
2878	movq	%rsp, %rbp
2879	movl	%edi, %ecx
2880	movl	XMSR_ACCESS_VAL, %edi	/* this value is needed to access MSR */
2881	rdmsr
2882	shlq	$32, %rdx
2883	orq	%rdx, %rax
2884	leave
2885	ret
2886	SET_SIZE(xrdmsr)
2887
2888	ENTRY(xwrmsr)
2889	pushq	%rbp
2890	movq	%rsp, %rbp
2891	movl	%edi, %ecx
2892	movl	XMSR_ACCESS_VAL, %edi	/* this value is needed to access MSR */
2893	movq	%rsi, %rdx
2894	shrq	$32, %rdx
2895	movl	%esi, %eax
2896	wrmsr
2897	leave
2898	ret
2899	SET_SIZE(xwrmsr)
2900
2901#elif defined(__i386)
2902
2903	ENTRY(rdmsr)
2904	movl	4(%esp), %ecx
2905	rdmsr
2906	ret
2907	SET_SIZE(rdmsr)
2908
2909	ENTRY(wrmsr)
2910	movl	4(%esp), %ecx
2911	movl	8(%esp), %eax
2912	movl	12(%esp), %edx
2913	wrmsr
2914	ret
2915	SET_SIZE(wrmsr)
2916
2917	ENTRY(xrdmsr)
2918	pushl	%ebp
2919	movl	%esp, %ebp
2920	movl	8(%esp), %ecx
2921	pushl	%edi
2922	movl	XMSR_ACCESS_VAL, %edi	/* this value is needed to access MSR */
2923	rdmsr
2924	popl	%edi
2925	leave
2926	ret
2927	SET_SIZE(xrdmsr)
2928
2929	ENTRY(xwrmsr)
2930	pushl	%ebp
2931	movl	%esp, %ebp
2932	movl	8(%esp), %ecx
2933	movl	12(%esp), %eax
2934	movl	16(%esp), %edx
2935	pushl	%edi
2936	movl	XMSR_ACCESS_VAL, %edi	/* this value is needed to access MSR */
2937	wrmsr
2938	popl	%edi
2939	leave
2940	ret
2941	SET_SIZE(xwrmsr)
2942
2943#endif	/* __i386 */
2944
2945	ENTRY(invalidate_cache)
2946	wbinvd
2947	ret
2948	SET_SIZE(invalidate_cache)
2949
2950#endif	/* __lint */
2951
2952#if defined(__lint)
2953
2954/*ARGSUSED*/
2955void
2956getcregs(struct cregs *crp)
2957{}
2958
2959#else	/* __lint */
2960
2961#if defined(__amd64)
2962
2963	ENTRY_NP(getcregs)
2964#if defined(__xpv)
2965	/*
2966	 * Only a few of the hardware control registers or descriptor tables
2967	 * are directly accessible to us, so just zero the structure.
2968	 *
2969	 * XXPV	Perhaps it would be helpful for the hypervisor to return
2970	 *	virtualized versions of these for post-mortem use.
2971	 *	(Need to reevaluate - perhaps it already does!)
2972	 */
2973	pushq	%rdi		/* save *crp */
2974	movq	$CREGSZ, %rsi
2975	call	bzero
2976	popq	%rdi
2977
2978	/*
2979	 * Dump what limited information we can
2980	 */
2981	movq	%cr0, %rax
2982	movq	%rax, CREG_CR0(%rdi)	/* cr0 */
2983	movq	%cr2, %rax
2984	movq	%rax, CREG_CR2(%rdi)	/* cr2 */
2985	movq	%cr3, %rax
2986	movq	%rax, CREG_CR3(%rdi)	/* cr3 */
2987	movq	%cr4, %rax
2988	movq	%rax, CREG_CR4(%rdi)	/* cr4 */
2989
2990#else	/* __xpv */
2991
2992#define	GETMSR(r, off, d)	\
2993	movl	$r, %ecx;	\
2994	rdmsr;			\
2995	movl	%eax, off(d);	\
2996	movl	%edx, off+4(d)
2997
2998	xorl	%eax, %eax
2999	movq	%rax, CREG_GDT+8(%rdi)
3000	sgdt	CREG_GDT(%rdi)		/* 10 bytes */
3001	movq	%rax, CREG_IDT+8(%rdi)
3002	sidt	CREG_IDT(%rdi)		/* 10 bytes */
3003	movq	%rax, CREG_LDT(%rdi)
3004	sldt	CREG_LDT(%rdi)		/* 2 bytes */
3005	movq	%rax, CREG_TASKR(%rdi)
3006	str	CREG_TASKR(%rdi)	/* 2 bytes */
3007	movq	%cr0, %rax
3008	movq	%rax, CREG_CR0(%rdi)	/* cr0 */
3009	movq	%cr2, %rax
3010	movq	%rax, CREG_CR2(%rdi)	/* cr2 */
3011	movq	%cr3, %rax
3012	movq	%rax, CREG_CR3(%rdi)	/* cr3 */
3013	movq	%cr4, %rax
3014	movq	%rax, CREG_CR4(%rdi)	/* cr4 */
3015	movq	%cr8, %rax
3016	movq	%rax, CREG_CR8(%rdi)	/* cr8 */
3017	GETMSR(MSR_AMD_KGSBASE, CREG_KGSBASE, %rdi)
3018	GETMSR(MSR_AMD_EFER, CREG_EFER, %rdi)
3019#endif	/* __xpv */
3020	ret
3021	SET_SIZE(getcregs)
3022
3023#undef GETMSR
3024
3025#elif defined(__i386)
3026
3027	ENTRY_NP(getcregs)
3028#if defined(__xpv)
3029	/*
3030	 * Only a few of the hardware control registers or descriptor tables
3031	 * are directly accessible to us, so just zero the structure.
3032	 *
3033	 * XXPV	Perhaps it would be helpful for the hypervisor to return
3034	 *	virtualized versions of these for post-mortem use.
3035	 *	(Need to reevaluate - perhaps it already does!)
3036	 */
3037	movl	4(%esp), %edx
3038	pushl	$CREGSZ
3039	pushl	%edx
3040	call	bzero
3041	addl	$8, %esp
3042	movl	4(%esp), %edx
3043
3044	/*
3045	 * Dump what limited information we can
3046	 */
3047	movl	%cr0, %eax
3048	movl	%eax, CREG_CR0(%edx)	/* cr0 */
3049	movl	%cr2, %eax
3050	movl	%eax, CREG_CR2(%edx)	/* cr2 */
3051	movl	%cr3, %eax
3052	movl	%eax, CREG_CR3(%edx)	/* cr3 */
3053	movl	%cr4, %eax
3054	movl	%eax, CREG_CR4(%edx)	/* cr4 */
3055
3056#else	/* __xpv */
3057
3058	movl	4(%esp), %edx
3059	movw	$0, CREG_GDT+6(%edx)
3060	movw	$0, CREG_IDT+6(%edx)
3061	sgdt	CREG_GDT(%edx)		/* gdt */
3062	sidt	CREG_IDT(%edx)		/* idt */
3063	sldt	CREG_LDT(%edx)		/* ldt */
3064	str	CREG_TASKR(%edx)	/* task */
3065	movl	%cr0, %eax
3066	movl	%eax, CREG_CR0(%edx)	/* cr0 */
3067	movl	%cr2, %eax
3068	movl	%eax, CREG_CR2(%edx)	/* cr2 */
3069	movl	%cr3, %eax
3070	movl	%eax, CREG_CR3(%edx)	/* cr3 */
3071	testl	$X86_LARGEPAGE, x86_feature
3072	jz	.nocr4
3073	movl	%cr4, %eax
3074	movl	%eax, CREG_CR4(%edx)	/* cr4 */
3075	jmp	.skip
3076.nocr4:
3077	movl	$0, CREG_CR4(%edx)
3078.skip:
3079#endif
3080	ret
3081	SET_SIZE(getcregs)
3082
3083#endif	/* __i386 */
3084#endif	/* __lint */
3085
3086
3087/*
3088 * A panic trigger is a word which is updated atomically and can only be set
3089 * once.  We atomically store 0xDEFACEDD and load the old value.  If the
3090 * previous value was 0, we succeed and return 1; otherwise return 0.
3091 * This allows a partially corrupt trigger to still trigger correctly.  DTrace
3092 * has its own version of this function to allow it to panic correctly from
3093 * probe context.
3094 */
3095#if defined(__lint)
3096
3097/*ARGSUSED*/
3098int
3099panic_trigger(int *tp)
3100{ return (0); }
3101
3102/*ARGSUSED*/
3103int
3104dtrace_panic_trigger(int *tp)
3105{ return (0); }
3106
3107#else	/* __lint */
3108
3109#if defined(__amd64)
3110
3111	ENTRY_NP(panic_trigger)
3112	xorl	%eax, %eax
3113	movl	$0xdefacedd, %edx
3114	lock
3115	  xchgl	%edx, (%rdi)
3116	cmpl	$0, %edx
3117	je	0f
3118	movl	$0, %eax
3119	ret
31200:	movl	$1, %eax
3121	ret
3122	SET_SIZE(panic_trigger)
3123
3124	ENTRY_NP(dtrace_panic_trigger)
3125	xorl	%eax, %eax
3126	movl	$0xdefacedd, %edx
3127	lock
3128	  xchgl	%edx, (%rdi)
3129	cmpl	$0, %edx
3130	je	0f
3131	movl	$0, %eax
3132	ret
31330:	movl	$1, %eax
3134	ret
3135	SET_SIZE(dtrace_panic_trigger)
3136
3137#elif defined(__i386)
3138
3139	ENTRY_NP(panic_trigger)
3140	movl	4(%esp), %edx		/ %edx = address of trigger
3141	movl	$0xdefacedd, %eax	/ %eax = 0xdefacedd
3142	lock				/ assert lock
3143	xchgl %eax, (%edx)		/ exchange %eax and the trigger
3144	cmpl	$0, %eax		/ if (%eax == 0x0)
3145	je	0f			/   return (1);
3146	movl	$0, %eax		/ else
3147	ret				/   return (0);
31480:	movl	$1, %eax
3149	ret
3150	SET_SIZE(panic_trigger)
3151
3152	ENTRY_NP(dtrace_panic_trigger)
3153	movl	4(%esp), %edx		/ %edx = address of trigger
3154	movl	$0xdefacedd, %eax	/ %eax = 0xdefacedd
3155	lock				/ assert lock
3156	xchgl %eax, (%edx)		/ exchange %eax and the trigger
3157	cmpl	$0, %eax		/ if (%eax == 0x0)
3158	je	0f			/   return (1);
3159	movl	$0, %eax		/ else
3160	ret				/   return (0);
31610:	movl	$1, %eax
3162	ret
3163	SET_SIZE(dtrace_panic_trigger)
3164
3165#endif	/* __i386 */
3166#endif	/* __lint */
3167
3168/*
3169 * The panic() and cmn_err() functions invoke vpanic() as a common entry point
3170 * into the panic code implemented in panicsys().  vpanic() is responsible
3171 * for passing through the format string and arguments, and constructing a
3172 * regs structure on the stack into which it saves the current register
3173 * values.  If we are not dying due to a fatal trap, these registers will
3174 * then be preserved in panicbuf as the current processor state.  Before
3175 * invoking panicsys(), vpanic() activates the first panic trigger (see
3176 * common/os/panic.c) and switches to the panic_stack if successful.  Note that
3177 * DTrace takes a slightly different panic path if it must panic from probe
3178 * context.  Instead of calling panic, it calls into dtrace_vpanic(), which
3179 * sets up the initial stack as vpanic does, calls dtrace_panic_trigger(), and
3180 * branches back into vpanic().
3181 */
3182#if defined(__lint)
3183
3184/*ARGSUSED*/
3185void
3186vpanic(const char *format, va_list alist)
3187{}
3188
3189/*ARGSUSED*/
3190void
3191dtrace_vpanic(const char *format, va_list alist)
3192{}
3193
3194#else	/* __lint */
3195
3196#if defined(__amd64)
3197
3198	ENTRY_NP(vpanic)			/* Initial stack layout: */
3199
3200	pushq	%rbp				/* | %rip | 	0x60	*/
3201	movq	%rsp, %rbp			/* | %rbp |	0x58	*/
3202	pushfq					/* | rfl  |	0x50	*/
3203	pushq	%r11				/* | %r11 |	0x48	*/
3204	pushq	%r10				/* | %r10 |	0x40	*/
3205	pushq	%rbx				/* | %rbx |	0x38	*/
3206	pushq	%rax				/* | %rax |	0x30	*/
3207	pushq	%r9				/* | %r9  |	0x28	*/
3208	pushq	%r8				/* | %r8  |	0x20	*/
3209	pushq	%rcx				/* | %rcx |	0x18	*/
3210	pushq	%rdx				/* | %rdx |	0x10	*/
3211	pushq	%rsi				/* | %rsi |	0x8 alist */
3212	pushq	%rdi				/* | %rdi |	0x0 format */
3213
3214	movq	%rsp, %rbx			/* %rbx = current %rsp */
3215
3216	leaq	panic_quiesce(%rip), %rdi	/* %rdi = &panic_quiesce */
3217	call	panic_trigger			/* %eax = panic_trigger() */
3218
3219vpanic_common:
3220	/*
3221	 * The panic_trigger result is in %eax from the call above, and
3222	 * dtrace_panic places it in %eax before branching here.
3223	 * The rdmsr instructions that follow below will clobber %eax so
3224	 * we stash the panic_trigger result in %r11d.
3225	 */
3226	movl	%eax, %r11d
3227	cmpl	$0, %r11d
3228	je	0f
3229
3230	/*
3231	 * If panic_trigger() was successful, we are the first to initiate a
3232	 * panic: we now switch to the reserved panic_stack before continuing.
3233	 */
3234	leaq	panic_stack(%rip), %rsp
3235	addq	$PANICSTKSIZE, %rsp
32360:	subq	$REGSIZE, %rsp
3237	/*
3238	 * Now that we've got everything set up, store the register values as
3239	 * they were when we entered vpanic() to the designated location in
3240	 * the regs structure we allocated on the stack.
3241	 */
3242	movq	0x0(%rbx), %rcx
3243	movq	%rcx, REGOFF_RDI(%rsp)
3244	movq	0x8(%rbx), %rcx
3245	movq	%rcx, REGOFF_RSI(%rsp)
3246	movq	0x10(%rbx), %rcx
3247	movq	%rcx, REGOFF_RDX(%rsp)
3248	movq	0x18(%rbx), %rcx
3249	movq	%rcx, REGOFF_RCX(%rsp)
3250	movq	0x20(%rbx), %rcx
3251
3252	movq	%rcx, REGOFF_R8(%rsp)
3253	movq	0x28(%rbx), %rcx
3254	movq	%rcx, REGOFF_R9(%rsp)
3255	movq	0x30(%rbx), %rcx
3256	movq	%rcx, REGOFF_RAX(%rsp)
3257	movq	0x38(%rbx), %rcx
3258	movq	%rcx, REGOFF_RBX(%rsp)
3259	movq	0x58(%rbx), %rcx
3260
3261	movq	%rcx, REGOFF_RBP(%rsp)
3262	movq	0x40(%rbx), %rcx
3263	movq	%rcx, REGOFF_R10(%rsp)
3264	movq	0x48(%rbx), %rcx
3265	movq	%rcx, REGOFF_R11(%rsp)
3266	movq	%r12, REGOFF_R12(%rsp)
3267
3268	movq	%r13, REGOFF_R13(%rsp)
3269	movq	%r14, REGOFF_R14(%rsp)
3270	movq	%r15, REGOFF_R15(%rsp)
3271
3272	xorl	%ecx, %ecx
3273	movw	%ds, %cx
3274	movq	%rcx, REGOFF_DS(%rsp)
3275	movw	%es, %cx
3276	movq	%rcx, REGOFF_ES(%rsp)
3277	movw	%fs, %cx
3278	movq	%rcx, REGOFF_FS(%rsp)
3279	movw	%gs, %cx
3280	movq	%rcx, REGOFF_GS(%rsp)
3281
3282	movq	$0, REGOFF_TRAPNO(%rsp)
3283
3284	movq	$0, REGOFF_ERR(%rsp)
3285	leaq	vpanic(%rip), %rcx
3286	movq	%rcx, REGOFF_RIP(%rsp)
3287	movw	%cs, %cx
3288	movzwq	%cx, %rcx
3289	movq	%rcx, REGOFF_CS(%rsp)
3290	movq	0x50(%rbx), %rcx
3291	movq	%rcx, REGOFF_RFL(%rsp)
3292	movq	%rbx, %rcx
3293	addq	$0x60, %rcx
3294	movq	%rcx, REGOFF_RSP(%rsp)
3295	movw	%ss, %cx
3296	movzwq	%cx, %rcx
3297	movq	%rcx, REGOFF_SS(%rsp)
3298
3299	/*
3300	 * panicsys(format, alist, rp, on_panic_stack)
3301	 */
3302	movq	REGOFF_RDI(%rsp), %rdi		/* format */
3303	movq	REGOFF_RSI(%rsp), %rsi		/* alist */
3304	movq	%rsp, %rdx			/* struct regs */
3305	movl	%r11d, %ecx			/* on_panic_stack */
3306	call	panicsys
3307	addq	$REGSIZE, %rsp
3308	popq	%rdi
3309	popq	%rsi
3310	popq	%rdx
3311	popq	%rcx
3312	popq	%r8
3313	popq	%r9
3314	popq	%rax
3315	popq	%rbx
3316	popq	%r10
3317	popq	%r11
3318	popfq
3319	leave
3320	ret
3321	SET_SIZE(vpanic)
3322
3323	ENTRY_NP(dtrace_vpanic)			/* Initial stack layout: */
3324
3325	pushq	%rbp				/* | %rip | 	0x60	*/
3326	movq	%rsp, %rbp			/* | %rbp |	0x58	*/
3327	pushfq					/* | rfl  |	0x50	*/
3328	pushq	%r11				/* | %r11 |	0x48	*/
3329	pushq	%r10				/* | %r10 |	0x40	*/
3330	pushq	%rbx				/* | %rbx |	0x38	*/
3331	pushq	%rax				/* | %rax |	0x30	*/
3332	pushq	%r9				/* | %r9  |	0x28	*/
3333	pushq	%r8				/* | %r8  |	0x20	*/
3334	pushq	%rcx				/* | %rcx |	0x18	*/
3335	pushq	%rdx				/* | %rdx |	0x10	*/
3336	pushq	%rsi				/* | %rsi |	0x8 alist */
3337	pushq	%rdi				/* | %rdi |	0x0 format */
3338
3339	movq	%rsp, %rbx			/* %rbx = current %rsp */
3340
3341	leaq	panic_quiesce(%rip), %rdi	/* %rdi = &panic_quiesce */
3342	call	dtrace_panic_trigger	/* %eax = dtrace_panic_trigger() */
3343	jmp	vpanic_common
3344
3345	SET_SIZE(dtrace_vpanic)
3346
3347#elif defined(__i386)
3348
3349	ENTRY_NP(vpanic)			/ Initial stack layout:
3350
3351	pushl	%ebp				/ | %eip | 20
3352	movl	%esp, %ebp			/ | %ebp | 16
3353	pushl	%eax				/ | %eax | 12
3354	pushl	%ebx				/ | %ebx |  8
3355	pushl	%ecx				/ | %ecx |  4
3356	pushl	%edx				/ | %edx |  0
3357
3358	movl	%esp, %ebx			/ %ebx = current stack pointer
3359
3360	lea	panic_quiesce, %eax		/ %eax = &panic_quiesce
3361	pushl	%eax				/ push &panic_quiesce
3362	call	panic_trigger			/ %eax = panic_trigger()
3363	addl	$4, %esp			/ reset stack pointer
3364
3365vpanic_common:
3366	cmpl	$0, %eax			/ if (%eax == 0)
3367	je	0f				/   goto 0f;
3368
3369	/*
3370	 * If panic_trigger() was successful, we are the first to initiate a
3371	 * panic: we now switch to the reserved panic_stack before continuing.
3372	 */
3373	lea	panic_stack, %esp		/ %esp  = panic_stack
3374	addl	$PANICSTKSIZE, %esp		/ %esp += PANICSTKSIZE
3375
33760:	subl	$REGSIZE, %esp			/ allocate struct regs
3377
3378	/*
3379	 * Now that we've got everything set up, store the register values as
3380	 * they were when we entered vpanic() to the designated location in
3381	 * the regs structure we allocated on the stack.
3382	 */
3383#if !defined(__GNUC_AS__)
3384	movw	%gs, %edx
3385	movl	%edx, REGOFF_GS(%esp)
3386	movw	%fs, %edx
3387	movl	%edx, REGOFF_FS(%esp)
3388	movw	%es, %edx
3389	movl	%edx, REGOFF_ES(%esp)
3390	movw	%ds, %edx
3391	movl	%edx, REGOFF_DS(%esp)
3392#else	/* __GNUC_AS__ */
3393	mov	%gs, %edx
3394	mov	%edx, REGOFF_GS(%esp)
3395	mov	%fs, %edx
3396	mov	%edx, REGOFF_FS(%esp)
3397	mov	%es, %edx
3398	mov	%edx, REGOFF_ES(%esp)
3399	mov	%ds, %edx
3400	mov	%edx, REGOFF_DS(%esp)
3401#endif	/* __GNUC_AS__ */
3402	movl	%edi, REGOFF_EDI(%esp)
3403	movl	%esi, REGOFF_ESI(%esp)
3404	movl	16(%ebx), %ecx
3405	movl	%ecx, REGOFF_EBP(%esp)
3406	movl	%ebx, %ecx
3407	addl	$20, %ecx
3408	movl	%ecx, REGOFF_ESP(%esp)
3409	movl	8(%ebx), %ecx
3410	movl	%ecx, REGOFF_EBX(%esp)
3411	movl	0(%ebx), %ecx
3412	movl	%ecx, REGOFF_EDX(%esp)
3413	movl	4(%ebx), %ecx
3414	movl	%ecx, REGOFF_ECX(%esp)
3415	movl	12(%ebx), %ecx
3416	movl	%ecx, REGOFF_EAX(%esp)
3417	movl	$0, REGOFF_TRAPNO(%esp)
3418	movl	$0, REGOFF_ERR(%esp)
3419	lea	vpanic, %ecx
3420	movl	%ecx, REGOFF_EIP(%esp)
3421#if !defined(__GNUC_AS__)
3422	movw	%cs, %edx
3423#else	/* __GNUC_AS__ */
3424	mov	%cs, %edx
3425#endif	/* __GNUC_AS__ */
3426	movl	%edx, REGOFF_CS(%esp)
3427	pushfl
3428	popl	%ecx
3429#if defined(__xpv)
3430	/*
3431	 * Synthesize the PS_IE bit from the event mask bit
3432	 */
3433	CURTHREAD(%edx)
3434	KPREEMPT_DISABLE(%edx)
3435	EVENT_MASK_TO_IE(%edx, %ecx)
3436	CURTHREAD(%edx)
3437	KPREEMPT_ENABLE_NOKP(%edx)
3438#endif
3439	movl	%ecx, REGOFF_EFL(%esp)
3440	movl	$0, REGOFF_UESP(%esp)
3441#if !defined(__GNUC_AS__)
3442	movw	%ss, %edx
3443#else	/* __GNUC_AS__ */
3444	mov	%ss, %edx
3445#endif	/* __GNUC_AS__ */
3446	movl	%edx, REGOFF_SS(%esp)
3447
3448	movl	%esp, %ecx			/ %ecx = &regs
3449	pushl	%eax				/ push on_panic_stack
3450	pushl	%ecx				/ push &regs
3451	movl	12(%ebp), %ecx			/ %ecx = alist
3452	pushl	%ecx				/ push alist
3453	movl	8(%ebp), %ecx			/ %ecx = format
3454	pushl	%ecx				/ push format
3455	call	panicsys			/ panicsys();
3456	addl	$16, %esp			/ pop arguments
3457
3458	addl	$REGSIZE, %esp
3459	popl	%edx
3460	popl	%ecx
3461	popl	%ebx
3462	popl	%eax
3463	leave
3464	ret
3465	SET_SIZE(vpanic)
3466
3467	ENTRY_NP(dtrace_vpanic)			/ Initial stack layout:
3468
3469	pushl	%ebp				/ | %eip | 20
3470	movl	%esp, %ebp			/ | %ebp | 16
3471	pushl	%eax				/ | %eax | 12
3472	pushl	%ebx				/ | %ebx |  8
3473	pushl	%ecx				/ | %ecx |  4
3474	pushl	%edx				/ | %edx |  0
3475
3476	movl	%esp, %ebx			/ %ebx = current stack pointer
3477
3478	lea	panic_quiesce, %eax		/ %eax = &panic_quiesce
3479	pushl	%eax				/ push &panic_quiesce
3480	call	dtrace_panic_trigger		/ %eax = dtrace_panic_trigger()
3481	addl	$4, %esp			/ reset stack pointer
3482	jmp	vpanic_common			/ jump back to common code
3483
3484	SET_SIZE(dtrace_vpanic)
3485
3486#endif	/* __i386 */
3487#endif	/* __lint */
3488
3489#if defined(__lint)
3490
3491void
3492hres_tick(void)
3493{}
3494
3495int64_t timedelta;
3496hrtime_t hres_last_tick;
3497volatile timestruc_t hrestime;
3498int64_t hrestime_adj;
3499volatile int hres_lock;
3500hrtime_t hrtime_base;
3501
3502#else	/* __lint */
3503
3504	DGDEF3(hrestime, _MUL(2, CLONGSIZE), 8)
3505	.NWORD	0, 0
3506
3507	DGDEF3(hrestime_adj, 8, 8)
3508	.long	0, 0
3509
3510	DGDEF3(hres_last_tick, 8, 8)
3511	.long	0, 0
3512
3513	DGDEF3(timedelta, 8, 8)
3514	.long	0, 0
3515
3516	DGDEF3(hres_lock, 4, 8)
3517	.long	0
3518
3519	/*
3520	 * initialized to a non zero value to make pc_gethrtime()
3521	 * work correctly even before clock is initialized
3522	 */
3523	DGDEF3(hrtime_base, 8, 8)
3524	.long	_MUL(NSEC_PER_CLOCK_TICK, 6), 0
3525
3526	DGDEF3(adj_shift, 4, 4)
3527	.long	ADJ_SHIFT
3528
3529#if defined(__amd64)
3530
3531	ENTRY_NP(hres_tick)
3532	pushq	%rbp
3533	movq	%rsp, %rbp
3534
3535	/*
3536	 * We need to call *gethrtimef before picking up CLOCK_LOCK (obviously,
3537	 * hres_last_tick can only be modified while holding CLOCK_LOCK).
3538	 * At worst, performing this now instead of under CLOCK_LOCK may
3539	 * introduce some jitter in pc_gethrestime().
3540	 */
3541	call	*gethrtimef(%rip)
3542	movq	%rax, %r8
3543
3544	leaq	hres_lock(%rip), %rax
3545	movb	$-1, %dl
3546.CL1:
3547	xchgb	%dl, (%rax)
3548	testb	%dl, %dl
3549	jz	.CL3			/* got it */
3550.CL2:
3551	cmpb	$0, (%rax)		/* possible to get lock? */
3552	pause
3553	jne	.CL2
3554	jmp	.CL1			/* yes, try again */
3555.CL3:
3556	/*
3557	 * compute the interval since last time hres_tick was called
3558	 * and adjust hrtime_base and hrestime accordingly
3559	 * hrtime_base is an 8 byte value (in nsec), hrestime is
3560	 * a timestruc_t (sec, nsec)
3561	 */
3562	leaq	hres_last_tick(%rip), %rax
3563	movq	%r8, %r11
3564	subq	(%rax), %r8
3565	addq	%r8, hrtime_base(%rip)	/* add interval to hrtime_base */
3566	addq	%r8, hrestime+8(%rip)	/* add interval to hrestime.tv_nsec */
3567	/*
3568	 * Now that we have CLOCK_LOCK, we can update hres_last_tick
3569	 */
3570	movq	%r11, (%rax)
3571
3572	call	__adj_hrestime
3573
3574	/*
3575	 * release the hres_lock
3576	 */
3577	incl	hres_lock(%rip)
3578	leave
3579	ret
3580	SET_SIZE(hres_tick)
3581
3582#elif defined(__i386)
3583
3584	ENTRY_NP(hres_tick)
3585	pushl	%ebp
3586	movl	%esp, %ebp
3587	pushl	%esi
3588	pushl	%ebx
3589
3590	/*
3591	 * We need to call *gethrtimef before picking up CLOCK_LOCK (obviously,
3592	 * hres_last_tick can only be modified while holding CLOCK_LOCK).
3593	 * At worst, performing this now instead of under CLOCK_LOCK may
3594	 * introduce some jitter in pc_gethrestime().
3595	 */
3596	call	*gethrtimef
3597	movl	%eax, %ebx
3598	movl	%edx, %esi
3599
3600	movl	$hres_lock, %eax
3601	movl	$-1, %edx
3602.CL1:
3603	xchgb	%dl, (%eax)
3604	testb	%dl, %dl
3605	jz	.CL3			/ got it
3606.CL2:
3607	cmpb	$0, (%eax)		/ possible to get lock?
3608	pause
3609	jne	.CL2
3610	jmp	.CL1			/ yes, try again
3611.CL3:
3612	/*
3613	 * compute the interval since last time hres_tick was called
3614	 * and adjust hrtime_base and hrestime accordingly
3615	 * hrtime_base is an 8 byte value (in nsec), hrestime is
3616	 * timestruc_t (sec, nsec)
3617	 */
3618
3619	lea	hres_last_tick, %eax
3620
3621	movl	%ebx, %edx
3622	movl	%esi, %ecx
3623
3624	subl 	(%eax), %edx
3625	sbbl 	4(%eax), %ecx
3626
3627	addl	%edx, hrtime_base	/ add interval to hrtime_base
3628	adcl	%ecx, hrtime_base+4
3629
3630	addl 	%edx, hrestime+4	/ add interval to hrestime.tv_nsec
3631
3632	/
3633	/ Now that we have CLOCK_LOCK, we can update hres_last_tick.
3634	/
3635	movl	%ebx, (%eax)
3636	movl	%esi,  4(%eax)
3637
3638	/ get hrestime at this moment. used as base for pc_gethrestime
3639	/
3640	/ Apply adjustment, if any
3641	/
3642	/ #define HRES_ADJ	(NSEC_PER_CLOCK_TICK >> ADJ_SHIFT)
3643	/ (max_hres_adj)
3644	/
3645	/ void
3646	/ adj_hrestime()
3647	/ {
3648	/	long long adj;
3649	/
3650	/	if (hrestime_adj == 0)
3651	/		adj = 0;
3652	/	else if (hrestime_adj > 0) {
3653	/		if (hrestime_adj < HRES_ADJ)
3654	/			adj = hrestime_adj;
3655	/		else
3656	/			adj = HRES_ADJ;
3657	/	}
3658	/	else {
3659	/		if (hrestime_adj < -(HRES_ADJ))
3660	/			adj = -(HRES_ADJ);
3661	/		else
3662	/			adj = hrestime_adj;
3663	/	}
3664	/
3665	/	timedelta -= adj;
3666	/	hrestime_adj = timedelta;
3667	/	hrestime.tv_nsec += adj;
3668	/
3669	/	while (hrestime.tv_nsec >= NANOSEC) {
3670	/		one_sec++;
3671	/		hrestime.tv_sec++;
3672	/		hrestime.tv_nsec -= NANOSEC;
3673	/	}
3674	/ }
3675__adj_hrestime:
3676	movl	hrestime_adj, %esi	/ if (hrestime_adj == 0)
3677	movl	hrestime_adj+4, %edx
3678	andl	%esi, %esi
3679	jne	.CL4			/ no
3680	andl	%edx, %edx
3681	jne	.CL4			/ no
3682	subl	%ecx, %ecx		/ yes, adj = 0;
3683	subl	%edx, %edx
3684	jmp	.CL5
3685.CL4:
3686	subl	%ecx, %ecx
3687	subl	%eax, %eax
3688	subl	%esi, %ecx
3689	sbbl	%edx, %eax
3690	andl	%eax, %eax		/ if (hrestime_adj > 0)
3691	jge	.CL6
3692
3693	/ In the following comments, HRES_ADJ is used, while in the code
3694	/ max_hres_adj is used.
3695	/
3696	/ The test for "hrestime_adj < HRES_ADJ" is complicated because
3697	/ hrestime_adj is 64-bits, while HRES_ADJ is 32-bits.  We rely
3698	/ on the logical equivalence of:
3699	/
3700	/	!(hrestime_adj < HRES_ADJ)
3701	/
3702	/ and the two step sequence:
3703	/
3704	/	(HRES_ADJ - lsw(hrestime_adj)) generates a Borrow/Carry
3705	/
3706	/ which computes whether or not the least significant 32-bits
3707	/ of hrestime_adj is greater than HRES_ADJ, followed by:
3708	/
3709	/	Previous Borrow/Carry + -1 + msw(hrestime_adj) generates a Carry
3710	/
3711	/ which generates a carry whenever step 1 is true or the most
3712	/ significant long of the longlong hrestime_adj is non-zero.
3713
3714	movl	max_hres_adj, %ecx	/ hrestime_adj is positive
3715	subl	%esi, %ecx
3716	movl	%edx, %eax
3717	adcl	$-1, %eax
3718	jnc	.CL7
3719	movl	max_hres_adj, %ecx	/ adj = HRES_ADJ;
3720	subl	%edx, %edx
3721	jmp	.CL5
3722
3723	/ The following computation is similar to the one above.
3724	/
3725	/ The test for "hrestime_adj < -(HRES_ADJ)" is complicated because
3726	/ hrestime_adj is 64-bits, while HRES_ADJ is 32-bits.  We rely
3727	/ on the logical equivalence of:
3728	/
3729	/	(hrestime_adj > -HRES_ADJ)
3730	/
3731	/ and the two step sequence:
3732	/
3733	/	(HRES_ADJ + lsw(hrestime_adj)) generates a Carry
3734	/
3735	/ which means the least significant 32-bits of hrestime_adj is
3736	/ greater than -HRES_ADJ, followed by:
3737	/
3738	/	Previous Carry + 0 + msw(hrestime_adj) generates a Carry
3739	/
3740	/ which generates a carry only when step 1 is true and the most
3741	/ significant long of the longlong hrestime_adj is -1.
3742
3743.CL6:					/ hrestime_adj is negative
3744	movl	%esi, %ecx
3745	addl	max_hres_adj, %ecx
3746	movl	%edx, %eax
3747	adcl	$0, %eax
3748	jc	.CL7
3749	xor	%ecx, %ecx
3750	subl	max_hres_adj, %ecx	/ adj = -(HRES_ADJ);
3751	movl	$-1, %edx
3752	jmp	.CL5
3753.CL7:
3754	movl	%esi, %ecx		/ adj = hrestime_adj;
3755.CL5:
3756	movl	timedelta, %esi
3757	subl	%ecx, %esi
3758	movl	timedelta+4, %eax
3759	sbbl	%edx, %eax
3760	movl	%esi, timedelta
3761	movl	%eax, timedelta+4	/ timedelta -= adj;
3762	movl	%esi, hrestime_adj
3763	movl	%eax, hrestime_adj+4	/ hrestime_adj = timedelta;
3764	addl	hrestime+4, %ecx
3765
3766	movl	%ecx, %eax		/ eax = tv_nsec
37671:
3768	cmpl	$NANOSEC, %eax		/ if ((unsigned long)tv_nsec >= NANOSEC)
3769	jb	.CL8			/ no
3770	incl	one_sec			/ yes,  one_sec++;
3771	incl	hrestime		/ hrestime.tv_sec++;
3772	addl	$-NANOSEC, %eax		/ tv_nsec -= NANOSEC
3773	jmp	1b			/ check for more seconds
3774
3775.CL8:
3776	movl	%eax, hrestime+4	/ store final into hrestime.tv_nsec
3777	incl	hres_lock		/ release the hres_lock
3778
3779	popl	%ebx
3780	popl	%esi
3781	leave
3782	ret
3783	SET_SIZE(hres_tick)
3784
3785#endif	/* __i386 */
3786#endif	/* __lint */
3787
3788/*
3789 * void prefetch_smap_w(void *)
3790 *
3791 * Prefetch ahead within a linear list of smap structures.
3792 * Not implemented for ia32.  Stub for compatibility.
3793 */
3794
3795#if defined(__lint)
3796
3797/*ARGSUSED*/
3798void prefetch_smap_w(void *smp)
3799{}
3800
3801#else	/* __lint */
3802
3803	ENTRY(prefetch_smap_w)
3804	rep;	ret	/* use 2 byte return instruction when branch target */
3805			/* AMD Software Optimization Guide - Section 6.2 */
3806	SET_SIZE(prefetch_smap_w)
3807
3808#endif	/* __lint */
3809
3810/*
3811 * prefetch_page_r(page_t *)
3812 * issue prefetch instructions for a page_t
3813 */
3814#if defined(__lint)
3815
3816/*ARGSUSED*/
3817void
3818prefetch_page_r(void *pp)
3819{}
3820
3821#else	/* __lint */
3822
3823	ENTRY(prefetch_page_r)
3824	rep;	ret	/* use 2 byte return instruction when branch target */
3825			/* AMD Software Optimization Guide - Section 6.2 */
3826	SET_SIZE(prefetch_page_r)
3827
3828#endif	/* __lint */
3829
3830#if defined(__lint)
3831
3832/*ARGSUSED*/
3833int
3834bcmp(const void *s1, const void *s2, size_t count)
3835{ return (0); }
3836
3837#else   /* __lint */
3838
3839#if defined(__amd64)
3840
3841	ENTRY(bcmp)
3842	pushq	%rbp
3843	movq	%rsp, %rbp
3844#ifdef DEBUG
3845	movq	postbootkernelbase(%rip), %r11
3846	cmpq	%r11, %rdi
3847	jb	0f
3848	cmpq	%r11, %rsi
3849	jnb	1f
38500:	leaq	.bcmp_panic_msg(%rip), %rdi
3851	xorl	%eax, %eax
3852	call	panic
38531:
3854#endif	/* DEBUG */
3855	call	memcmp
3856	testl	%eax, %eax
3857	setne	%dl
3858	leave
3859	movzbl	%dl, %eax
3860	ret
3861	SET_SIZE(bcmp)
3862
3863#elif defined(__i386)
3864
3865#define	ARG_S1		8
3866#define	ARG_S2		12
3867#define	ARG_LENGTH	16
3868
3869	ENTRY(bcmp)
3870	pushl	%ebp
3871	movl	%esp, %ebp	/ create new stack frame
3872#ifdef DEBUG
3873	movl    postbootkernelbase, %eax
3874	cmpl    %eax, ARG_S1(%ebp)
3875	jb	0f
3876	cmpl    %eax, ARG_S2(%ebp)
3877	jnb	1f
38780:	pushl   $.bcmp_panic_msg
3879	call    panic
38801:
3881#endif	/* DEBUG */
3882
3883	pushl	%edi		/ save register variable
3884	movl	ARG_S1(%ebp), %eax	/ %eax = address of string 1
3885	movl	ARG_S2(%ebp), %ecx	/ %ecx = address of string 2
3886	cmpl	%eax, %ecx	/ if the same string
3887	je	.equal		/ goto .equal
3888	movl	ARG_LENGTH(%ebp), %edi	/ %edi = length in bytes
3889	cmpl	$4, %edi	/ if %edi < 4
3890	jb	.byte_check	/ goto .byte_check
3891	.align	4
3892.word_loop:
3893	movl	(%ecx), %edx	/ move 1 word from (%ecx) to %edx
3894	leal	-4(%edi), %edi	/ %edi -= 4
3895	cmpl	(%eax), %edx	/ compare 1 word from (%eax) with %edx
3896	jne	.word_not_equal	/ if not equal, goto .word_not_equal
3897	leal	4(%ecx), %ecx	/ %ecx += 4 (next word)
3898	leal	4(%eax), %eax	/ %eax += 4 (next word)
3899	cmpl	$4, %edi	/ if %edi >= 4
3900	jae	.word_loop	/ goto .word_loop
3901.byte_check:
3902	cmpl	$0, %edi	/ if %edi == 0
3903	je	.equal		/ goto .equal
3904	jmp	.byte_loop	/ goto .byte_loop (checks in bytes)
3905.word_not_equal:
3906	leal	4(%edi), %edi	/ %edi += 4 (post-decremented)
3907	.align	4
3908.byte_loop:
3909	movb	(%ecx),	%dl	/ move 1 byte from (%ecx) to %dl
3910	cmpb	%dl, (%eax)	/ compare %dl with 1 byte from (%eax)
3911	jne	.not_equal	/ if not equal, goto .not_equal
3912	incl	%ecx		/ %ecx++ (next byte)
3913	incl	%eax		/ %eax++ (next byte)
3914	decl	%edi		/ %edi--
3915	jnz	.byte_loop	/ if not zero, goto .byte_loop
3916.equal:
3917	xorl	%eax, %eax	/ %eax = 0
3918	popl	%edi		/ restore register variable
3919	leave			/ restore old stack frame
3920	ret			/ return (NULL)
3921	.align	4
3922.not_equal:
3923	movl	$1, %eax	/ return 1
3924	popl	%edi		/ restore register variable
3925	leave			/ restore old stack frame
3926	ret			/ return (NULL)
3927	SET_SIZE(bcmp)
3928
3929#endif	/* __i386 */
3930
3931#ifdef DEBUG
3932	.text
3933.bcmp_panic_msg:
3934	.string "bcmp: arguments below kernelbase"
3935#endif	/* DEBUG */
3936
3937#endif	/* __lint */
3938
3939#if defined(__lint)
3940
3941uint_t
3942bsrw_insn(uint16_t mask)
3943{
3944	uint_t index = sizeof (mask) * NBBY - 1;
3945
3946	while ((mask & (1 << index)) == 0)
3947		index--;
3948	return (index);
3949}
3950
3951#else	/* __lint */
3952
3953#if defined(__amd64)
3954
3955	ENTRY_NP(bsrw_insn)
3956	xorl	%eax, %eax
3957	bsrw	%di, %ax
3958	ret
3959	SET_SIZE(bsrw_insn)
3960
3961#elif defined(__i386)
3962
3963	ENTRY_NP(bsrw_insn)
3964	movw	4(%esp), %cx
3965	xorl	%eax, %eax
3966	bsrw	%cx, %ax
3967	ret
3968	SET_SIZE(bsrw_insn)
3969
3970#endif	/* __i386 */
3971#endif	/* __lint */
3972
3973#if defined(__lint)
3974
3975uint_t
3976atomic_btr32(uint32_t *pending, uint_t pil)
3977{
3978	return (*pending &= ~(1 << pil));
3979}
3980
3981#else	/* __lint */
3982
3983#if defined(__i386)
3984
3985	ENTRY_NP(atomic_btr32)
3986	movl	4(%esp), %ecx
3987	movl	8(%esp), %edx
3988	xorl	%eax, %eax
3989	lock
3990	btrl	%edx, (%ecx)
3991	setc	%al
3992	ret
3993	SET_SIZE(atomic_btr32)
3994
3995#endif	/* __i386 */
3996#endif	/* __lint */
3997
3998#if defined(__lint)
3999
4000/*ARGSUSED*/
4001void
4002switch_sp_and_call(void *newsp, void (*func)(uint_t, uint_t), uint_t arg1,
4003	    uint_t arg2)
4004{}
4005
4006#else	/* __lint */
4007
4008#if defined(__amd64)
4009
4010	ENTRY_NP(switch_sp_and_call)
4011	pushq	%rbp
4012	movq	%rsp, %rbp		/* set up stack frame */
4013	movq	%rdi, %rsp		/* switch stack pointer */
4014	movq	%rdx, %rdi		/* pass func arg 1 */
4015	movq	%rsi, %r11		/* save function to call */
4016	movq	%rcx, %rsi		/* pass func arg 2 */
4017	call	*%r11			/* call function */
4018	leave				/* restore stack */
4019	ret
4020	SET_SIZE(switch_sp_and_call)
4021
4022#elif defined(__i386)
4023
4024	ENTRY_NP(switch_sp_and_call)
4025	pushl	%ebp
4026	mov	%esp, %ebp		/* set up stack frame */
4027	movl	8(%ebp), %esp		/* switch stack pointer */
4028	pushl	20(%ebp)		/* push func arg 2 */
4029	pushl	16(%ebp)		/* push func arg 1 */
4030	call	*12(%ebp)		/* call function */
4031	addl	$8, %esp		/* pop arguments */
4032	leave				/* restore stack */
4033	ret
4034	SET_SIZE(switch_sp_and_call)
4035
4036#endif	/* __i386 */
4037#endif	/* __lint */
4038
4039#if defined(__lint)
4040
4041void
4042kmdb_enter(void)
4043{}
4044
4045#else	/* __lint */
4046
4047#if defined(__amd64)
4048
4049	ENTRY_NP(kmdb_enter)
4050	pushq	%rbp
4051	movq	%rsp, %rbp
4052
4053	/*
4054	 * Save flags, do a 'cli' then return the saved flags
4055	 */
4056	call	intr_clear
4057
4058	int	$T_DBGENTR
4059
4060	/*
4061	 * Restore the saved flags
4062	 */
4063	movq	%rax, %rdi
4064	call	intr_restore
4065
4066	leave
4067	ret
4068	SET_SIZE(kmdb_enter)
4069
4070#elif defined(__i386)
4071
4072	ENTRY_NP(kmdb_enter)
4073	pushl	%ebp
4074	movl	%esp, %ebp
4075
4076	/*
4077	 * Save flags, do a 'cli' then return the saved flags
4078	 */
4079	call	intr_clear
4080
4081	int	$T_DBGENTR
4082
4083	/*
4084	 * Restore the saved flags
4085	 */
4086	pushl	%eax
4087	call	intr_restore
4088	addl	$4, %esp
4089
4090	leave
4091	ret
4092	SET_SIZE(kmdb_enter)
4093
4094#endif	/* __i386 */
4095#endif	/* __lint */
4096
4097#if defined(__lint)
4098
4099void
4100return_instr(void)
4101{}
4102
4103#else	/* __lint */
4104
4105	ENTRY_NP(return_instr)
4106	rep;	ret	/* use 2 byte instruction when branch target */
4107			/* AMD Software Optimization Guide - Section 6.2 */
4108	SET_SIZE(return_instr)
4109
4110#endif	/* __lint */
4111
4112#if defined(__lint)
4113
4114ulong_t
4115getflags(void)
4116{
4117	return (0);
4118}
4119
4120#else	/* __lint */
4121
4122#if defined(__amd64)
4123
4124	ENTRY(getflags)
4125	pushfq
4126	popq	%rax
4127#if defined(__xpv)
4128	CURTHREAD(%rdi)
4129	KPREEMPT_DISABLE(%rdi)
4130	/*
4131	 * Synthesize the PS_IE bit from the event mask bit
4132	 */
4133	CURVCPU(%r11)
4134	andq    $_BITNOT(PS_IE), %rax
4135	XEN_TEST_UPCALL_MASK(%r11)
4136	jnz	1f
4137	orq	$PS_IE, %rax
41381:
4139	KPREEMPT_ENABLE_NOKP(%rdi)
4140#endif
4141	ret
4142	SET_SIZE(getflags)
4143
4144#elif defined(__i386)
4145
4146	ENTRY(getflags)
4147	pushfl
4148	popl	%eax
4149#if defined(__xpv)
4150	CURTHREAD(%ecx)
4151	KPREEMPT_DISABLE(%ecx)
4152	/*
4153	 * Synthesize the PS_IE bit from the event mask bit
4154	 */
4155	CURVCPU(%edx)
4156	andl    $_BITNOT(PS_IE), %eax
4157	XEN_TEST_UPCALL_MASK(%edx)
4158	jnz	1f
4159	orl	$PS_IE, %eax
41601:
4161	KPREEMPT_ENABLE_NOKP(%ecx)
4162#endif
4163	ret
4164	SET_SIZE(getflags)
4165
4166#endif	/* __i386 */
4167
4168#endif	/* __lint */
4169
4170#if defined(__lint)
4171
4172ftrace_icookie_t
4173ftrace_interrupt_disable(void)
4174{ return (0); }
4175
4176#else   /* __lint */
4177
4178#if defined(__amd64)
4179
4180	ENTRY(ftrace_interrupt_disable)
4181	pushfq
4182	popq	%rax
4183	CLI(%rdx)
4184	ret
4185	SET_SIZE(ftrace_interrupt_disable)
4186
4187#elif defined(__i386)
4188
4189	ENTRY(ftrace_interrupt_disable)
4190	pushfl
4191	popl	%eax
4192	CLI(%edx)
4193	ret
4194	SET_SIZE(ftrace_interrupt_disable)
4195
4196#endif	/* __i386 */
4197#endif	/* __lint */
4198
4199#if defined(__lint)
4200
4201/*ARGSUSED*/
4202void
4203ftrace_interrupt_enable(ftrace_icookie_t cookie)
4204{}
4205
4206#else	/* __lint */
4207
4208#if defined(__amd64)
4209
4210	ENTRY(ftrace_interrupt_enable)
4211	pushq	%rdi
4212	popfq
4213	ret
4214	SET_SIZE(ftrace_interrupt_enable)
4215
4216#elif defined(__i386)
4217
4218	ENTRY(ftrace_interrupt_enable)
4219	movl	4(%esp), %eax
4220	pushl	%eax
4221	popfl
4222	ret
4223	SET_SIZE(ftrace_interrupt_enable)
4224
4225#endif	/* __i386 */
4226#endif	/* __lint */
4227
4228#if defined (__lint)
4229
4230/*ARGSUSED*/
4231void
4232iommu_cpu_nop(void)
4233{}
4234
4235#else /* __lint */
4236
4237	ENTRY(iommu_cpu_nop)
4238	rep;	nop
4239	ret
4240	SET_SIZE(iommu_cpu_nop)
4241
4242#endif /* __lint */
4243
4244#if defined (__lint)
4245
4246/*ARGSUSED*/
4247void
4248clflush_insn(caddr_t addr)
4249{}
4250
4251#else /* __lint */
4252
4253#if defined (__amd64)
4254	ENTRY(clflush_insn)
4255	clflush (%rdi)
4256	ret
4257	SET_SIZE(clflush_insn)
4258#elif defined (__i386)
4259	ENTRY(clflush_insn)
4260	movl	4(%esp), %eax
4261	clflush (%eax)
4262	ret
4263	SET_SIZE(clflush_insn)
4264
4265#endif /* __i386 */
4266#endif /* __lint */
4267
4268#if defined (__lint)
4269/*ARGSUSED*/
4270void
4271mfence_insn(void)
4272{}
4273
4274#else /* __lint */
4275
4276#if defined (__amd64)
4277	ENTRY(mfence_insn)
4278	mfence
4279	ret
4280	SET_SIZE(mfence_insn)
4281#elif defined (__i386)
4282	ENTRY(mfence_insn)
4283	mfence
4284	ret
4285	SET_SIZE(mfence_insn)
4286
4287#endif /* __i386 */
4288#endif /* __lint */
4289
4290/*
4291 * This is how VMware lets the guests figure that they are running
4292 * on top of VMWare platform :
4293 * Write 0xA in the ECX register and put the I/O port address value of
4294 * 0x564D5868 in the EAX register. Then read a word from port 0x5658.
4295 * If VMWare is installed than this code will be executed correctly and
4296 * the EBX register will contain the same I/O port address value of 0x564D5868.
4297 * If VMWare is not installed then OS will return an exception on port access.
4298 */
4299#if defined(__lint)
4300
4301int
4302vmware_platform(void) { return (1); }
4303
4304#else
4305
4306#if defined(__amd64)
4307
4308	ENTRY(vmware_platform)
4309	pushq	%rbx
4310	xorl	%ebx, %ebx
4311	movl	$0x564d5868, %eax
4312	movl	$0xa, %ecx
4313	movl	$0x5658, %edx
4314	inl	(%dx)
4315	movl	$0x564d5868, %ecx
4316	xorl	%eax, %eax
4317	cmpl	%ecx, %ebx
4318	jne	1f
4319	incl	%eax
43201:
4321	popq	%rbx
4322	ret
4323	SET_SIZE(vmware_platform)
4324
4325#elif defined(__i386)
4326
4327	ENTRY(vmware_platform)
4328	pushl	%ebx
4329	pushl	%ecx
4330	pushl	%edx
4331	xorl	%ebx, %ebx
4332	movl	$0x564d5868, %eax
4333	movl	$0xa, %ecx
4334	movl	$0x5658, %edx
4335	inl	(%dx)
4336	movl	$0x564d5868, %ecx
4337	xorl	%eax, %eax
4338	cmpl	%ecx, %ebx
4339	jne	1f
4340	incl	%eax
43411:
4342	popl	%edx
4343	popl	%ecx
4344	popl	%ebx
4345	ret
4346	SET_SIZE(vmware_platform)
4347
4348#endif /* __i386 */
4349#endif /* __lint */
4350