xref: /titanic_44/usr/src/uts/intel/ia32/ml/i86_subr.s (revision afbc45417fe46ac092547adfe04ef4966809b862)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*
28 *  Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.
29 *  Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T
30 *    All Rights Reserved
31 */
32
33#pragma ident	"%Z%%M%	%I%	%E% SMI"
34
35/*
36 * General assembly language routines.
37 * It is the intent of this file to contain routines that are
38 * independent of the specific kernel architecture, and those that are
39 * common across kernel architectures.
40 * As architectures diverge, and implementations of specific
41 * architecture-dependent routines change, the routines should be moved
42 * from this file into the respective ../`arch -k`/subr.s file.
43 */
44
45#include <sys/asm_linkage.h>
46#include <sys/asm_misc.h>
47#include <sys/panic.h>
48#include <sys/ontrap.h>
49#include <sys/regset.h>
50#include <sys/privregs.h>
51#include <sys/reboot.h>
52#include <sys/psw.h>
53#include <sys/x86_archext.h>
54
55#if defined(__lint)
56#include <sys/types.h>
57#include <sys/systm.h>
58#include <sys/thread.h>
59#include <sys/archsystm.h>
60#include <sys/byteorder.h>
61#include <sys/dtrace.h>
62#include <sys/ftrace.h>
63#else	/* __lint */
64#include "assym.h"
65#endif	/* __lint */
66#include <sys/dditypes.h>
67
68/*
69 * on_fault()
70 * Catch lofault faults. Like setjmp except it returns one
71 * if code following causes uncorrectable fault. Turned off
72 * by calling no_fault().
73 */
74
75#if defined(__lint)
76
77/* ARGSUSED */
78int
79on_fault(label_t *ljb)
80{ return (0); }
81
82void
83no_fault(void)
84{}
85
86#else	/* __lint */
87
88#if defined(__amd64)
89
90	ENTRY(on_fault)
91	movq	%gs:CPU_THREAD, %rsi
92	leaq	catch_fault(%rip), %rdx
93	movq	%rdi, T_ONFAULT(%rsi)		/* jumpbuf in t_onfault */
94	movq	%rdx, T_LOFAULT(%rsi)		/* catch_fault in t_lofault */
95	jmp	setjmp				/* let setjmp do the rest */
96
97catch_fault:
98	movq	%gs:CPU_THREAD, %rsi
99	movq	T_ONFAULT(%rsi), %rdi		/* address of save area */
100	xorl	%eax, %eax
101	movq	%rax, T_ONFAULT(%rsi)		/* turn off onfault */
102	movq	%rax, T_LOFAULT(%rsi)		/* turn off lofault */
103	jmp	longjmp				/* let longjmp do the rest */
104	SET_SIZE(on_fault)
105
106	ENTRY(no_fault)
107	movq	%gs:CPU_THREAD, %rsi
108	xorl	%eax, %eax
109	movq	%rax, T_ONFAULT(%rsi)		/* turn off onfault */
110	movq	%rax, T_LOFAULT(%rsi)		/* turn off lofault */
111	ret
112	SET_SIZE(no_fault)
113
114#elif defined(__i386)
115
116	ENTRY(on_fault)
117	movl	%gs:CPU_THREAD, %edx
118	movl	4(%esp), %eax			/* jumpbuf address */
119	leal	catch_fault, %ecx
120	movl	%eax, T_ONFAULT(%edx)		/* jumpbuf in t_onfault */
121	movl	%ecx, T_LOFAULT(%edx)		/* catch_fault in t_lofault */
122	jmp	setjmp				/* let setjmp do the rest */
123
124catch_fault:
125	movl	%gs:CPU_THREAD, %edx
126	xorl	%eax, %eax
127	movl	T_ONFAULT(%edx), %ecx		/* address of save area */
128	movl	%eax, T_ONFAULT(%edx)		/* turn off onfault */
129	movl	%eax, T_LOFAULT(%edx)		/* turn off lofault */
130	pushl	%ecx
131	call	longjmp				/* let longjmp do the rest */
132	SET_SIZE(on_fault)
133
134	ENTRY(no_fault)
135	movl	%gs:CPU_THREAD, %edx
136	xorl	%eax, %eax
137	movl	%eax, T_ONFAULT(%edx)		/* turn off onfault */
138	movl	%eax, T_LOFAULT(%edx)		/* turn off lofault */
139	ret
140	SET_SIZE(no_fault)
141
142#endif	/* __i386 */
143#endif	/* __lint */
144
145/*
146 * Default trampoline code for on_trap() (see <sys/ontrap.h>).  We just
147 * do a longjmp(&curthread->t_ontrap->ot_jmpbuf) if this is ever called.
148 */
149
150#if defined(lint)
151
152void
153on_trap_trampoline(void)
154{}
155
156#else	/* __lint */
157
158#if defined(__amd64)
159
160	ENTRY(on_trap_trampoline)
161	movq	%gs:CPU_THREAD, %rsi
162	movq	T_ONTRAP(%rsi), %rdi
163	addq	$OT_JMPBUF, %rdi
164	jmp	longjmp
165	SET_SIZE(on_trap_trampoline)
166
167#elif defined(__i386)
168
169	ENTRY(on_trap_trampoline)
170	movl	%gs:CPU_THREAD, %eax
171	movl	T_ONTRAP(%eax), %eax
172	addl	$OT_JMPBUF, %eax
173	pushl	%eax
174	call	longjmp
175	SET_SIZE(on_trap_trampoline)
176
177#endif	/* __i386 */
178#endif	/* __lint */
179
180/*
181 * Push a new element on to the t_ontrap stack.  Refer to <sys/ontrap.h> for
182 * more information about the on_trap() mechanism.  If the on_trap_data is the
183 * same as the topmost stack element, we just modify that element.
184 */
185#if defined(lint)
186
187/*ARGSUSED*/
188int
189on_trap(on_trap_data_t *otp, uint_t prot)
190{ return (0); }
191
192#else	/* __lint */
193
194#if defined(__amd64)
195
196	ENTRY(on_trap)
197	movw	%si, OT_PROT(%rdi)		/* ot_prot = prot */
198	movw	$0, OT_TRAP(%rdi)		/* ot_trap = 0 */
199	leaq	on_trap_trampoline(%rip), %rdx	/* rdx = &on_trap_trampoline */
200	movq	%rdx, OT_TRAMPOLINE(%rdi)	/* ot_trampoline = rdx */
201	xorl	%ecx, %ecx
202	movq	%rcx, OT_HANDLE(%rdi)		/* ot_handle = NULL */
203	movq	%rcx, OT_PAD1(%rdi)		/* ot_pad1 = NULL */
204	movq	%gs:CPU_THREAD, %rdx		/* rdx = curthread */
205	movq	T_ONTRAP(%rdx), %rcx		/* rcx = curthread->t_ontrap */
206	cmpq	%rdi, %rcx			/* if (otp == %rcx)	*/
207	je	0f				/*	don't modify t_ontrap */
208
209	movq	%rcx, OT_PREV(%rdi)		/* ot_prev = t_ontrap */
210	movq	%rdi, T_ONTRAP(%rdx)		/* curthread->t_ontrap = otp */
211
2120:	addq	$OT_JMPBUF, %rdi		/* &ot_jmpbuf */
213	jmp	setjmp
214	SET_SIZE(on_trap)
215
216#elif defined(__i386)
217
218	ENTRY(on_trap)
219	movl	4(%esp), %eax			/* %eax = otp */
220	movl	8(%esp), %edx			/* %edx = prot */
221
222	movw	%dx, OT_PROT(%eax)		/* ot_prot = prot */
223	movw	$0, OT_TRAP(%eax)		/* ot_trap = 0 */
224	leal	on_trap_trampoline, %edx	/* %edx = &on_trap_trampoline */
225	movl	%edx, OT_TRAMPOLINE(%eax)	/* ot_trampoline = %edx */
226	movl	$0, OT_HANDLE(%eax)		/* ot_handle = NULL */
227	movl	$0, OT_PAD1(%eax)		/* ot_pad1 = NULL */
228	movl	%gs:CPU_THREAD, %edx		/* %edx = curthread */
229	movl	T_ONTRAP(%edx), %ecx		/* %ecx = curthread->t_ontrap */
230	cmpl	%eax, %ecx			/* if (otp == %ecx) */
231	je	0f				/*    don't modify t_ontrap */
232
233	movl	%ecx, OT_PREV(%eax)		/* ot_prev = t_ontrap */
234	movl	%eax, T_ONTRAP(%edx)		/* curthread->t_ontrap = otp */
235
2360:	addl	$OT_JMPBUF, %eax		/* %eax = &ot_jmpbuf */
237	movl	%eax, 4(%esp)			/* put %eax back on the stack */
238	jmp	setjmp				/* let setjmp do the rest */
239	SET_SIZE(on_trap)
240
241#endif	/* __i386 */
242#endif	/* __lint */
243
244/*
245 * Setjmp and longjmp implement non-local gotos using state vectors
246 * type label_t.
247 */
248
249#if defined(__lint)
250
251/* ARGSUSED */
252int
253setjmp(label_t *lp)
254{ return (0); }
255
256/* ARGSUSED */
257void
258longjmp(label_t *lp)
259{}
260
261#else	/* __lint */
262
263#if LABEL_PC != 0
264#error LABEL_PC MUST be defined as 0 for setjmp/longjmp to work as coded
265#endif	/* LABEL_PC != 0 */
266
267#if defined(__amd64)
268
269	ENTRY(setjmp)
270	movq	%rsp, LABEL_SP(%rdi)
271	movq	%rbp, LABEL_RBP(%rdi)
272	movq	%rbx, LABEL_RBX(%rdi)
273	movq	%r12, LABEL_R12(%rdi)
274	movq	%r13, LABEL_R13(%rdi)
275	movq	%r14, LABEL_R14(%rdi)
276	movq	%r15, LABEL_R15(%rdi)
277	movq	(%rsp), %rdx		/* return address */
278	movq	%rdx, (%rdi)		/* LABEL_PC is 0 */
279	xorl	%eax, %eax		/* return 0 */
280	ret
281	SET_SIZE(setjmp)
282
283	ENTRY(longjmp)
284	movq	LABEL_SP(%rdi), %rsp
285	movq	LABEL_RBP(%rdi), %rbp
286	movq	LABEL_RBX(%rdi), %rbx
287	movq	LABEL_R12(%rdi), %r12
288	movq	LABEL_R13(%rdi), %r13
289	movq	LABEL_R14(%rdi), %r14
290	movq	LABEL_R15(%rdi), %r15
291	movq	(%rdi), %rdx		/* return address; LABEL_PC is 0 */
292	movq	%rdx, (%rsp)
293	xorl	%eax, %eax
294	incl	%eax			/* return 1 */
295	ret
296	SET_SIZE(longjmp)
297
298#elif defined(__i386)
299
300	ENTRY(setjmp)
301	movl	4(%esp), %edx		/* address of save area */
302	movl	%ebp, LABEL_EBP(%edx)
303	movl	%ebx, LABEL_EBX(%edx)
304	movl	%esi, LABEL_ESI(%edx)
305	movl	%edi, LABEL_EDI(%edx)
306	movl	%esp, 4(%edx)
307	movl	(%esp), %ecx		/* %eip (return address) */
308	movl	%ecx, (%edx)		/* LABEL_PC is 0 */
309	subl	%eax, %eax		/* return 0 */
310	ret
311	SET_SIZE(setjmp)
312
313	ENTRY(longjmp)
314	movl	4(%esp), %edx		/* address of save area */
315	movl	LABEL_EBP(%edx), %ebp
316	movl	LABEL_EBX(%edx), %ebx
317	movl	LABEL_ESI(%edx), %esi
318	movl	LABEL_EDI(%edx), %edi
319	movl	4(%edx), %esp
320	movl	(%edx), %ecx		/* %eip (return addr); LABEL_PC is 0 */
321	movl	$1, %eax
322	addl	$4, %esp		/* pop ret adr */
323	jmp	*%ecx			/* indirect */
324	SET_SIZE(longjmp)
325
326#endif	/* __i386 */
327#endif	/* __lint */
328
329/*
330 * if a() calls b() calls caller(),
331 * caller() returns return address in a().
332 * (Note: We assume a() and b() are C routines which do the normal entry/exit
333 *  sequence.)
334 */
335
336#if defined(__lint)
337
338caddr_t
339caller(void)
340{ return (0); }
341
342#else	/* __lint */
343
344#if defined(__amd64)
345
346	ENTRY(caller)
347	movq	8(%rbp), %rax		/* b()'s return pc, in a() */
348	ret
349	SET_SIZE(caller)
350
351#elif defined(__i386)
352
353	ENTRY(caller)
354	movl	4(%ebp), %eax		/* b()'s return pc, in a() */
355	ret
356	SET_SIZE(caller)
357
358#endif	/* __i386 */
359#endif	/* __lint */
360
361/*
362 * if a() calls callee(), callee() returns the
363 * return address in a();
364 */
365
366#if defined(__lint)
367
368caddr_t
369callee(void)
370{ return (0); }
371
372#else	/* __lint */
373
374#if defined(__amd64)
375
376	ENTRY(callee)
377	movq	(%rsp), %rax		/* callee()'s return pc, in a() */
378	ret
379	SET_SIZE(callee)
380
381#elif defined(__i386)
382
383	ENTRY(callee)
384	movl	(%esp), %eax		/* callee()'s return pc, in a() */
385	ret
386	SET_SIZE(callee)
387
388#endif	/* __i386 */
389#endif	/* __lint */
390
391/*
392 * return the current frame pointer
393 */
394
395#if defined(__lint)
396
397greg_t
398getfp(void)
399{ return (0); }
400
401#else	/* __lint */
402
403#if defined(__amd64)
404
405	ENTRY(getfp)
406	movq	%rbp, %rax
407	ret
408	SET_SIZE(getfp)
409
410#elif defined(__i386)
411
412	ENTRY(getfp)
413	movl	%ebp, %eax
414	ret
415	SET_SIZE(getfp)
416
417#endif	/* __i386 */
418#endif	/* __lint */
419
420/*
421 * Invalidate a single page table entry in the TLB
422 */
423
424#if defined(__lint)
425
426/* ARGSUSED */
427void
428mmu_tlbflush_entry(caddr_t m)
429{}
430
431#else	/* __lint */
432
433#if defined(__amd64)
434
435	ENTRY(mmu_tlbflush_entry)
436	invlpg	(%rdi)
437	ret
438	SET_SIZE(mmu_tlbflush_entry)
439
440#elif defined(__i386)
441
442	ENTRY(mmu_tlbflush_entry)
443	movl	4(%esp), %eax
444	invlpg	(%eax)
445	ret
446	SET_SIZE(mmu_tlbflush_entry)
447
448#endif	/* __i386 */
449#endif	/* __lint */
450
451
452/*
453 * Get/Set the value of various control registers
454 */
455
456#if defined(__lint)
457
458ulong_t
459getcr0(void)
460{ return (0); }
461
462/* ARGSUSED */
463void
464setcr0(ulong_t value)
465{}
466
467ulong_t
468getcr2(void)
469{ return (0); }
470
471ulong_t
472getcr3(void)
473{ return (0); }
474
475#if !defined(__xpv)
476/* ARGSUSED */
477void
478setcr3(ulong_t val)
479{}
480
481void
482reload_cr3(void)
483{}
484#endif
485
486ulong_t
487getcr4(void)
488{ return (0); }
489
490/* ARGSUSED */
491void
492setcr4(ulong_t val)
493{}
494
495#if defined(__amd64)
496
497ulong_t
498getcr8(void)
499{ return (0); }
500
501/* ARGSUSED */
502void
503setcr8(ulong_t val)
504{}
505
506#endif	/* __amd64 */
507
508#else	/* __lint */
509
510#if defined(__amd64)
511
512	ENTRY(getcr0)
513	movq	%cr0, %rax
514	ret
515	SET_SIZE(getcr0)
516
517	ENTRY(setcr0)
518	movq	%rdi, %cr0
519	ret
520	SET_SIZE(setcr0)
521
522        ENTRY(getcr2)
523#if defined(__xpv)
524	movq	%gs:CPU_VCPU_INFO, %rax
525	movq	VCPU_INFO_ARCH_CR2(%rax), %rax
526#else
527        movq    %cr2, %rax
528#endif
529        ret
530	SET_SIZE(getcr2)
531
532	ENTRY(getcr3)
533	movq    %cr3, %rax
534	ret
535	SET_SIZE(getcr3)
536
537#if !defined(__xpv)
538
539        ENTRY(setcr3)
540        movq    %rdi, %cr3
541        ret
542	SET_SIZE(setcr3)
543
544	ENTRY(reload_cr3)
545	movq	%cr3, %rdi
546	movq	%rdi, %cr3
547	ret
548	SET_SIZE(reload_cr3)
549
550#endif	/* __xpv */
551
552	ENTRY(getcr4)
553	movq	%cr4, %rax
554	ret
555	SET_SIZE(getcr4)
556
557	ENTRY(setcr4)
558	movq	%rdi, %cr4
559	ret
560	SET_SIZE(setcr4)
561
562	ENTRY(getcr8)
563	movq	%cr8, %rax
564	ret
565	SET_SIZE(getcr8)
566
567	ENTRY(setcr8)
568	movq	%rdi, %cr8
569	ret
570	SET_SIZE(setcr8)
571
572#elif defined(__i386)
573
574        ENTRY(getcr0)
575        movl    %cr0, %eax
576        ret
577	SET_SIZE(getcr0)
578
579        ENTRY(setcr0)
580        movl    4(%esp), %eax
581        movl    %eax, %cr0
582        ret
583	SET_SIZE(setcr0)
584
585        ENTRY(getcr2)
586#if defined(__xpv)
587	movl	%gs:CPU_VCPU_INFO, %eax
588	movl	VCPU_INFO_ARCH_CR2(%eax), %eax
589#else
590        movl    %cr2, %eax
591#endif
592        ret
593	SET_SIZE(getcr2)
594
595	ENTRY(getcr3)
596	movl    %cr3, %eax
597	ret
598	SET_SIZE(getcr3)
599
600#if !defined(__xpv)
601
602        ENTRY(setcr3)
603        movl    4(%esp), %eax
604        movl    %eax, %cr3
605        ret
606	SET_SIZE(setcr3)
607
608	ENTRY(reload_cr3)
609	movl    %cr3, %eax
610	movl    %eax, %cr3
611	ret
612	SET_SIZE(reload_cr3)
613
614#endif	/* __xpv */
615
616	ENTRY(getcr4)
617	movl    %cr4, %eax
618	ret
619	SET_SIZE(getcr4)
620
621        ENTRY(setcr4)
622        movl    4(%esp), %eax
623        movl    %eax, %cr4
624        ret
625	SET_SIZE(setcr4)
626
627#endif	/* __i386 */
628#endif	/* __lint */
629
630#if defined(__lint)
631
632/*ARGSUSED*/
633uint32_t
634__cpuid_insn(struct cpuid_regs *regs)
635{ return (0); }
636
637#else	/* __lint */
638
639#if defined(__amd64)
640
641	ENTRY(__cpuid_insn)
642	movq	%rbx, %r8
643	movq	%rcx, %r9
644	movq	%rdx, %r11
645	movl	(%rdi), %eax		/* %eax = regs->cp_eax */
646	movl	0x4(%rdi), %ebx		/* %ebx = regs->cp_ebx */
647	movl	0x8(%rdi), %ecx		/* %ecx = regs->cp_ecx */
648	movl	0xc(%rdi), %edx		/* %edx = regs->cp_edx */
649	cpuid
650	movl	%eax, (%rdi)		/* regs->cp_eax = %eax */
651	movl	%ebx, 0x4(%rdi)		/* regs->cp_ebx = %ebx */
652	movl	%ecx, 0x8(%rdi)		/* regs->cp_ecx = %ecx */
653	movl	%edx, 0xc(%rdi)		/* regs->cp_edx = %edx */
654	movq	%r8, %rbx
655	movq	%r9, %rcx
656	movq	%r11, %rdx
657	ret
658	SET_SIZE(__cpuid_insn)
659
660#elif defined(__i386)
661
662        ENTRY(__cpuid_insn)
663	pushl	%ebp
664	movl	0x8(%esp), %ebp		/* %ebp = regs */
665	pushl	%ebx
666	pushl	%ecx
667	pushl	%edx
668	movl	(%ebp), %eax		/* %eax = regs->cp_eax */
669	movl	0x4(%ebp), %ebx		/* %ebx = regs->cp_ebx */
670	movl	0x8(%ebp), %ecx		/* %ecx = regs->cp_ecx */
671	movl	0xc(%ebp), %edx		/* %edx = regs->cp_edx */
672	cpuid
673	movl	%eax, (%ebp)		/* regs->cp_eax = %eax */
674	movl	%ebx, 0x4(%ebp)		/* regs->cp_ebx = %ebx */
675	movl	%ecx, 0x8(%ebp)		/* regs->cp_ecx = %ecx */
676	movl	%edx, 0xc(%ebp)		/* regs->cp_edx = %edx */
677	popl	%edx
678	popl	%ecx
679	popl	%ebx
680	popl	%ebp
681	ret
682	SET_SIZE(__cpuid_insn)
683
684#endif	/* __i386 */
685#endif	/* __lint */
686
687#if defined(__xpv)
688	/*
689	 * Defined in C
690	 */
691#else
692
693#if defined(__lint)
694
695/*ARGSUSED*/
696void
697i86_monitor(volatile uint32_t *addr, uint32_t extensions, uint32_t hints)
698{ return; }
699
700#else   /* __lint */
701
702#if defined(__amd64)
703
704	ENTRY_NP(i86_monitor)
705	pushq	%rbp
706	movq	%rsp, %rbp
707	movq	%rdi, %rax		/* addr */
708	movq	%rsi, %rcx		/* extensions */
709	/* rdx contains input arg3: hints */
710	.byte	0x0f, 0x01, 0xc8	/* monitor */
711	leave
712	ret
713	SET_SIZE(i86_monitor)
714
715#elif defined(__i386)
716
717ENTRY_NP(i86_monitor)
718	pushl	%ebp
719	movl	%esp, %ebp
720	movl	0x8(%ebp),%eax		/* addr */
721	movl	0xc(%ebp),%ecx		/* extensions */
722	movl	0x10(%ebp),%edx		/* hints */
723	.byte	0x0f, 0x01, 0xc8	/* monitor */
724	leave
725	ret
726	SET_SIZE(i86_monitor)
727
728#endif	/* __i386 */
729#endif	/* __lint */
730
731#if defined(__lint)
732
733/*ARGSUSED*/
734void
735i86_mwait(uint32_t data, uint32_t extensions)
736{ return; }
737
738#else	/* __lint */
739
740#if defined(__amd64)
741
742	ENTRY_NP(i86_mwait)
743	pushq	%rbp
744	movq	%rsp, %rbp
745	movq	%rdi, %rax		/* data */
746	movq	%rsi, %rcx		/* extensions */
747	.byte	0x0f, 0x01, 0xc9	/* mwait */
748	leave
749	ret
750	SET_SIZE(i86_mwait)
751
752#elif defined(__i386)
753
754	ENTRY_NP(i86_mwait)
755	pushl	%ebp
756	movl	%esp, %ebp
757	movl	0x8(%ebp),%eax		/* data */
758	movl	0xc(%ebp),%ecx		/* extensions */
759	.byte	0x0f, 0x01, 0xc9	/* mwait */
760	leave
761	ret
762	SET_SIZE(i86_mwait)
763
764#endif	/* __i386 */
765#endif	/* __lint */
766
767#if defined(__lint)
768
769hrtime_t
770tsc_read(void)
771{
772	return (0);
773}
774
775#else	/* __lint */
776
777#if defined(__amd64)
778
779	ENTRY_NP(tsc_read)
780	movq	%rbx, %r11
781	movl	$0, %eax
782	cpuid
783	rdtsc
784	movq	%r11, %rbx
785	shlq	$32, %rdx
786	orq	%rdx, %rax
787	ret
788	.globl _tsc_mfence_start
789_tsc_mfence_start:
790	mfence
791	rdtsc
792	shlq	$32, %rdx
793	orq	%rdx, %rax
794	ret
795	.globl _tsc_mfence_end
796_tsc_mfence_end:
797	.globl _tscp_start
798_tscp_start:
799	.byte	0x0f, 0x01, 0xf9	/* rdtscp instruction */
800	shlq	$32, %rdx
801	orq	%rdx, %rax
802	ret
803	.globl _tscp_end
804_tscp_end:
805	.globl _no_rdtsc_start
806_no_rdtsc_start:
807	xorl	%edx, %edx
808	xorl	%eax, %eax
809	ret
810	.globl _no_rdtsc_end
811_no_rdtsc_end:
812	.globl _tsc_lfence_start
813_tsc_lfence_start:
814	lfence
815	rdtsc
816	shlq	$32, %rdx
817	orq	%rdx, %rax
818	ret
819	.globl _tsc_lfence_end
820_tsc_lfence_end:
821	SET_SIZE(tsc_read)
822
823#else /* __i386 */
824
825	ENTRY_NP(tsc_read)
826	pushl	%ebx
827	movl	$0, %eax
828	cpuid
829	rdtsc
830	popl	%ebx
831	ret
832	.globl _tsc_mfence_start
833_tsc_mfence_start:
834	mfence
835	rdtsc
836	ret
837	.globl _tsc_mfence_end
838_tsc_mfence_end:
839	.globl	_tscp_start
840_tscp_start:
841	.byte	0x0f, 0x01, 0xf9	/* rdtscp instruction */
842	ret
843	.globl _tscp_end
844_tscp_end:
845	.globl _no_rdtsc_start
846_no_rdtsc_start:
847	xorl	%edx, %edx
848	xorl	%eax, %eax
849	ret
850	.globl _no_rdtsc_end
851_no_rdtsc_end:
852	.globl _tsc_lfence_start
853_tsc_lfence_start:
854	lfence
855	rdtsc
856	ret
857	.globl _tsc_lfence_end
858_tsc_lfence_end:
859	SET_SIZE(tsc_read)
860
861#endif	/* __i386 */
862
863#endif	/* __lint */
864
865#endif	/* __xpv */
866
867/*
868 * Insert entryp after predp in a doubly linked list.
869 */
870
871#if defined(__lint)
872
873/*ARGSUSED*/
874void
875_insque(caddr_t entryp, caddr_t predp)
876{}
877
878#else	/* __lint */
879
880#if defined(__amd64)
881
882	ENTRY(_insque)
883	movq	(%rsi), %rax		/* predp->forw 			*/
884	movq	%rsi, CPTRSIZE(%rdi)	/* entryp->back = predp		*/
885	movq	%rax, (%rdi)		/* entryp->forw = predp->forw	*/
886	movq	%rdi, (%rsi)		/* predp->forw = entryp		*/
887	movq	%rdi, CPTRSIZE(%rax)	/* predp->forw->back = entryp	*/
888	ret
889	SET_SIZE(_insque)
890
891#elif defined(__i386)
892
893	ENTRY(_insque)
894	movl	8(%esp), %edx
895	movl	4(%esp), %ecx
896	movl	(%edx), %eax		/* predp->forw			*/
897	movl	%edx, CPTRSIZE(%ecx)	/* entryp->back = predp		*/
898	movl	%eax, (%ecx)		/* entryp->forw = predp->forw	*/
899	movl	%ecx, (%edx)		/* predp->forw = entryp		*/
900	movl	%ecx, CPTRSIZE(%eax)	/* predp->forw->back = entryp	*/
901	ret
902	SET_SIZE(_insque)
903
904#endif	/* __i386 */
905#endif	/* __lint */
906
907/*
908 * Remove entryp from a doubly linked list
909 */
910
911#if defined(__lint)
912
913/*ARGSUSED*/
914void
915_remque(caddr_t entryp)
916{}
917
918#else	/* __lint */
919
920#if defined(__amd64)
921
922	ENTRY(_remque)
923	movq	(%rdi), %rax		/* entry->forw */
924	movq	CPTRSIZE(%rdi), %rdx	/* entry->back */
925	movq	%rax, (%rdx)		/* entry->back->forw = entry->forw */
926	movq	%rdx, CPTRSIZE(%rax)	/* entry->forw->back = entry->back */
927	ret
928	SET_SIZE(_remque)
929
930#elif defined(__i386)
931
932	ENTRY(_remque)
933	movl	4(%esp), %ecx
934	movl	(%ecx), %eax		/* entry->forw */
935	movl	CPTRSIZE(%ecx), %edx	/* entry->back */
936	movl	%eax, (%edx)		/* entry->back->forw = entry->forw */
937	movl	%edx, CPTRSIZE(%eax)	/* entry->forw->back = entry->back */
938	ret
939	SET_SIZE(_remque)
940
941#endif	/* __i386 */
942#endif	/* __lint */
943
944/*
945 * Returns the number of
946 * non-NULL bytes in string argument.
947 */
948
949#if defined(__lint)
950
951/* ARGSUSED */
952size_t
953strlen(const char *str)
954{ return (0); }
955
956#else	/* __lint */
957
958#if defined(__amd64)
959
960/*
961 * This is close to a simple transliteration of a C version of this
962 * routine.  We should either just -make- this be a C version, or
963 * justify having it in assembler by making it significantly faster.
964 *
965 * size_t
966 * strlen(const char *s)
967 * {
968 *	const char *s0;
969 * #if defined(DEBUG)
970 *	if ((uintptr_t)s < KERNELBASE)
971 *		panic(.str_panic_msg);
972 * #endif
973 *	for (s0 = s; *s; s++)
974 *		;
975 *	return (s - s0);
976 * }
977 */
978
979	ENTRY(strlen)
980#ifdef DEBUG
981	movq	postbootkernelbase(%rip), %rax
982	cmpq	%rax, %rdi
983	jae	str_valid
984	pushq	%rbp
985	movq	%rsp, %rbp
986	leaq	.str_panic_msg(%rip), %rdi
987	xorl	%eax, %eax
988	call	panic
989#endif	/* DEBUG */
990str_valid:
991	cmpb	$0, (%rdi)
992	movq	%rdi, %rax
993	je	.null_found
994	.align	4
995.strlen_loop:
996	incq	%rdi
997	cmpb	$0, (%rdi)
998	jne	.strlen_loop
999.null_found:
1000	subq	%rax, %rdi
1001	movq	%rdi, %rax
1002	ret
1003	SET_SIZE(strlen)
1004
1005#elif defined(__i386)
1006
1007	ENTRY(strlen)
1008#ifdef DEBUG
1009	movl	postbootkernelbase, %eax
1010	cmpl	%eax, 4(%esp)
1011	jae	str_valid
1012	pushl	%ebp
1013	movl	%esp, %ebp
1014	pushl	$.str_panic_msg
1015	call	panic
1016#endif /* DEBUG */
1017
1018str_valid:
1019	movl	4(%esp), %eax		/* %eax = string address */
1020	testl	$3, %eax		/* if %eax not word aligned */
1021	jnz	.not_word_aligned	/* goto .not_word_aligned */
1022	.align	4
1023.word_aligned:
1024	movl	(%eax), %edx		/* move 1 word from (%eax) to %edx */
1025	movl	$0x7f7f7f7f, %ecx
1026	andl	%edx, %ecx		/* %ecx = %edx & 0x7f7f7f7f */
1027	addl	$4, %eax		/* next word */
1028	addl	$0x7f7f7f7f, %ecx	/* %ecx += 0x7f7f7f7f */
1029	orl	%edx, %ecx		/* %ecx |= %edx */
1030	andl	$0x80808080, %ecx	/* %ecx &= 0x80808080 */
1031	cmpl	$0x80808080, %ecx	/* if no null byte in this word */
1032	je	.word_aligned		/* goto .word_aligned */
1033	subl	$4, %eax		/* post-incremented */
1034.not_word_aligned:
1035	cmpb	$0, (%eax)		/* if a byte in (%eax) is null */
1036	je	.null_found		/* goto .null_found */
1037	incl	%eax			/* next byte */
1038	testl	$3, %eax		/* if %eax not word aligned */
1039	jnz	.not_word_aligned	/* goto .not_word_aligned */
1040	jmp	.word_aligned		/* goto .word_aligned */
1041	.align	4
1042.null_found:
1043	subl	4(%esp), %eax		/* %eax -= string address */
1044	ret
1045	SET_SIZE(strlen)
1046
1047#endif	/* __i386 */
1048
1049#ifdef DEBUG
1050	.text
1051.str_panic_msg:
1052	.string "strlen: argument below kernelbase"
1053#endif /* DEBUG */
1054
1055#endif	/* __lint */
1056
1057	/*
1058	 * Berkley 4.3 introduced symbolically named interrupt levels
1059	 * as a way deal with priority in a machine independent fashion.
1060	 * Numbered priorities are machine specific, and should be
1061	 * discouraged where possible.
1062	 *
1063	 * Note, for the machine specific priorities there are
1064	 * examples listed for devices that use a particular priority.
1065	 * It should not be construed that all devices of that
1066	 * type should be at that priority.  It is currently were
1067	 * the current devices fit into the priority scheme based
1068	 * upon time criticalness.
1069	 *
1070	 * The underlying assumption of these assignments is that
1071	 * IPL 10 is the highest level from which a device
1072	 * routine can call wakeup.  Devices that interrupt from higher
1073	 * levels are restricted in what they can do.  If they need
1074	 * kernels services they should schedule a routine at a lower
1075	 * level (via software interrupt) to do the required
1076	 * processing.
1077	 *
1078	 * Examples of this higher usage:
1079	 *	Level	Usage
1080	 *	14	Profiling clock (and PROM uart polling clock)
1081	 *	12	Serial ports
1082	 *
1083	 * The serial ports request lower level processing on level 6.
1084	 *
1085	 * Also, almost all splN routines (where N is a number or a
1086	 * mnemonic) will do a RAISE(), on the assumption that they are
1087	 * never used to lower our priority.
1088	 * The exceptions are:
1089	 *	spl8()		Because you can't be above 15 to begin with!
1090	 *	splzs()		Because this is used at boot time to lower our
1091	 *			priority, to allow the PROM to poll the uart.
1092	 *	spl0()		Used to lower priority to 0.
1093	 */
1094
1095#if defined(__lint)
1096
1097int spl0(void)		{ return (0); }
1098int spl6(void)		{ return (0); }
1099int spl7(void)		{ return (0); }
1100int spl8(void)		{ return (0); }
1101int splhigh(void)	{ return (0); }
1102int splhi(void)		{ return (0); }
1103int splzs(void)		{ return (0); }
1104
1105/* ARGSUSED */
1106void
1107splx(int level)
1108{}
1109
1110#else	/* __lint */
1111
1112#if defined(__amd64)
1113
1114#define	SETPRI(level) \
1115	movl	$/**/level, %edi;	/* new priority */		\
1116	jmp	do_splx			/* redirect to do_splx */
1117
1118#define	RAISE(level) \
1119	movl	$/**/level, %edi;	/* new priority */		\
1120	jmp	splr			/* redirect to splr */
1121
1122#elif defined(__i386)
1123
1124#define	SETPRI(level) \
1125	pushl	$/**/level;	/* new priority */			\
1126	call	do_splx;	/* invoke common splx code */		\
1127	addl	$4, %esp;	/* unstack arg */			\
1128	ret
1129
1130#define	RAISE(level) \
1131	pushl	$/**/level;	/* new priority */			\
1132	call	splr;		/* invoke common splr code */		\
1133	addl	$4, %esp;	/* unstack args */			\
1134	ret
1135
1136#endif	/* __i386 */
1137
1138	/* locks out all interrupts, including memory errors */
1139	ENTRY(spl8)
1140	SETPRI(15)
1141	SET_SIZE(spl8)
1142
1143	/* just below the level that profiling runs */
1144	ENTRY(spl7)
1145	RAISE(13)
1146	SET_SIZE(spl7)
1147
1148	/* sun specific - highest priority onboard serial i/o asy ports */
1149	ENTRY(splzs)
1150	SETPRI(12)	/* Can't be a RAISE, as it's used to lower us */
1151	SET_SIZE(splzs)
1152
1153	ENTRY(splhi)
1154	ALTENTRY(splhigh)
1155	ALTENTRY(spl6)
1156	ALTENTRY(i_ddi_splhigh)
1157
1158	RAISE(DISP_LEVEL)
1159
1160	SET_SIZE(i_ddi_splhigh)
1161	SET_SIZE(spl6)
1162	SET_SIZE(splhigh)
1163	SET_SIZE(splhi)
1164
1165	/* allow all interrupts */
1166	ENTRY(spl0)
1167	SETPRI(0)
1168	SET_SIZE(spl0)
1169
1170
1171	/* splx implentation */
1172	ENTRY(splx)
1173	jmp	do_splx		/* redirect to common splx code */
1174	SET_SIZE(splx)
1175
1176#endif	/* __lint */
1177
1178#if defined(__i386)
1179
1180/*
1181 * Read and write the %gs register
1182 */
1183
1184#if defined(__lint)
1185
1186/*ARGSUSED*/
1187uint16_t
1188getgs(void)
1189{ return (0); }
1190
1191/*ARGSUSED*/
1192void
1193setgs(uint16_t sel)
1194{}
1195
1196#else	/* __lint */
1197
1198	ENTRY(getgs)
1199	clr	%eax
1200	movw	%gs, %ax
1201	ret
1202	SET_SIZE(getgs)
1203
1204	ENTRY(setgs)
1205	movw	4(%esp), %gs
1206	ret
1207	SET_SIZE(setgs)
1208
1209#endif	/* __lint */
1210#endif	/* __i386 */
1211
1212#if defined(__lint)
1213
1214void
1215pc_reset(void)
1216{}
1217
1218void
1219efi_reset(void)
1220{}
1221
1222#else	/* __lint */
1223
1224	ENTRY(wait_500ms)
1225	push	%ebx
1226	movl	$50000, %ebx
12271:
1228	call	tenmicrosec
1229	decl	%ebx
1230	jnz	1b
1231	pop	%ebx
1232	ret
1233	SET_SIZE(wait_500ms)
1234
1235#define	RESET_METHOD_KBC	1
1236#define	RESET_METHOD_PORT92	2
1237#define RESET_METHOD_PCI	4
1238
1239	DGDEF3(pc_reset_methods, 4, 8)
1240	.long RESET_METHOD_KBC|RESET_METHOD_PORT92|RESET_METHOD_PCI;
1241
1242	ENTRY(pc_reset)
1243
1244#if defined(__i386)
1245	testl	$RESET_METHOD_KBC, pc_reset_methods
1246#elif defined(__amd64)
1247	testl	$RESET_METHOD_KBC, pc_reset_methods(%rip)
1248#endif
1249	jz	1f
1250
1251	/
1252	/ Try the classic keyboard controller-triggered reset.
1253	/
1254	movw	$0x64, %dx
1255	movb	$0xfe, %al
1256	outb	(%dx)
1257
1258	/ Wait up to 500 milliseconds here for the keyboard controller
1259	/ to pull the reset line.  On some systems where the keyboard
1260	/ controller is slow to pull the reset line, the next reset method
1261	/ may be executed (which may be bad if those systems hang when the
1262	/ next reset method is used, e.g. Ferrari 3400 (doesn't like port 92),
1263	/ and Ferrari 4000 (doesn't like the cf9 reset method))
1264
1265	call	wait_500ms
1266
12671:
1268#if defined(__i386)
1269	testl	$RESET_METHOD_PORT92, pc_reset_methods
1270#elif defined(__amd64)
1271	testl	$RESET_METHOD_PORT92, pc_reset_methods(%rip)
1272#endif
1273	jz	3f
1274
1275	/
1276	/ Try port 0x92 fast reset
1277	/
1278	movw	$0x92, %dx
1279	inb	(%dx)
1280	cmpb	$0xff, %al	/ If port's not there, we should get back 0xFF
1281	je	1f
1282	testb	$1, %al		/ If bit 0
1283	jz	2f		/ is clear, jump to perform the reset
1284	andb	$0xfe, %al	/ otherwise,
1285	outb	(%dx)		/ clear bit 0 first, then
12862:
1287	orb	$1, %al		/ Set bit 0
1288	outb	(%dx)		/ and reset the system
12891:
1290
1291	call	wait_500ms
1292
12933:
1294#if defined(__i386)
1295	testl	$RESET_METHOD_PCI, pc_reset_methods
1296#elif defined(__amd64)
1297	testl	$RESET_METHOD_PCI, pc_reset_methods(%rip)
1298#endif
1299	jz	4f
1300
1301	/ Try the PCI (soft) reset vector (should work on all modern systems,
1302	/ but has been shown to cause problems on 450NX systems, and some newer
1303	/ systems (e.g. ATI IXP400-equipped systems))
1304	/ When resetting via this method, 2 writes are required.  The first
1305	/ targets bit 1 (0=hard reset without power cycle, 1=hard reset with
1306	/ power cycle).
1307	/ The reset occurs on the second write, during bit 2's transition from
1308	/ 0->1.
1309	movw	$0xcf9, %dx
1310	movb	$0x2, %al	/ Reset mode = hard, no power cycle
1311	outb	(%dx)
1312	movb	$0x6, %al
1313	outb	(%dx)
1314
1315	call	wait_500ms
1316
13174:
1318	/
1319	/ port 0xcf9 failed also.  Last-ditch effort is to
1320	/ triple-fault the CPU.
1321	/ Also, use triple fault for EFI firmware
1322	/
1323	ENTRY(efi_reset)
1324#if defined(__amd64)
1325	pushq	$0x0
1326	pushq	$0x0		/ IDT base of 0, limit of 0 + 2 unused bytes
1327	lidt	(%rsp)
1328#elif defined(__i386)
1329	pushl	$0x0
1330	pushl	$0x0		/ IDT base of 0, limit of 0 + 2 unused bytes
1331	lidt	(%esp)
1332#endif
1333	int	$0x0		/ Trigger interrupt, generate triple-fault
1334
1335	cli
1336	hlt			/ Wait forever
1337	/*NOTREACHED*/
1338	SET_SIZE(efi_reset)
1339	SET_SIZE(pc_reset)
1340
1341#endif	/* __lint */
1342
1343/*
1344 * C callable in and out routines
1345 */
1346
1347#if defined(__lint)
1348
1349/* ARGSUSED */
1350void
1351outl(int port_address, uint32_t val)
1352{}
1353
1354#else	/* __lint */
1355
1356#if defined(__amd64)
1357
1358	ENTRY(outl)
1359	movw	%di, %dx
1360	movl	%esi, %eax
1361	outl	(%dx)
1362	ret
1363	SET_SIZE(outl)
1364
1365#elif defined(__i386)
1366
1367	.set	PORT, 4
1368	.set	VAL, 8
1369
1370	ENTRY(outl)
1371	movw	PORT(%esp), %dx
1372	movl	VAL(%esp), %eax
1373	outl	(%dx)
1374	ret
1375	SET_SIZE(outl)
1376
1377#endif	/* __i386 */
1378#endif	/* __lint */
1379
1380#if defined(__lint)
1381
1382/* ARGSUSED */
1383void
1384outw(int port_address, uint16_t val)
1385{}
1386
1387#else	/* __lint */
1388
1389#if defined(__amd64)
1390
1391	ENTRY(outw)
1392	movw	%di, %dx
1393	movw	%si, %ax
1394	D16 outl (%dx)		/* XX64 why not outw? */
1395	ret
1396	SET_SIZE(outw)
1397
1398#elif defined(__i386)
1399
1400	ENTRY(outw)
1401	movw	PORT(%esp), %dx
1402	movw	VAL(%esp), %ax
1403	D16 outl (%dx)
1404	ret
1405	SET_SIZE(outw)
1406
1407#endif	/* __i386 */
1408#endif	/* __lint */
1409
1410#if defined(__lint)
1411
1412/* ARGSUSED */
1413void
1414outb(int port_address, uint8_t val)
1415{}
1416
1417#else	/* __lint */
1418
1419#if defined(__amd64)
1420
1421	ENTRY(outb)
1422	movw	%di, %dx
1423	movb	%sil, %al
1424	outb	(%dx)
1425	ret
1426	SET_SIZE(outb)
1427
1428#elif defined(__i386)
1429
1430	ENTRY(outb)
1431	movw	PORT(%esp), %dx
1432	movb	VAL(%esp), %al
1433	outb	(%dx)
1434	ret
1435	SET_SIZE(outb)
1436
1437#endif	/* __i386 */
1438#endif	/* __lint */
1439
1440#if defined(__lint)
1441
1442/* ARGSUSED */
1443uint32_t
1444inl(int port_address)
1445{ return (0); }
1446
1447#else	/* __lint */
1448
1449#if defined(__amd64)
1450
1451	ENTRY(inl)
1452	xorl	%eax, %eax
1453	movw	%di, %dx
1454	inl	(%dx)
1455	ret
1456	SET_SIZE(inl)
1457
1458#elif defined(__i386)
1459
1460	ENTRY(inl)
1461	movw	PORT(%esp), %dx
1462	inl	(%dx)
1463	ret
1464	SET_SIZE(inl)
1465
1466#endif	/* __i386 */
1467#endif	/* __lint */
1468
1469#if defined(__lint)
1470
1471/* ARGSUSED */
1472uint16_t
1473inw(int port_address)
1474{ return (0); }
1475
1476#else	/* __lint */
1477
1478#if defined(__amd64)
1479
1480	ENTRY(inw)
1481	xorl	%eax, %eax
1482	movw	%di, %dx
1483	D16 inl	(%dx)
1484	ret
1485	SET_SIZE(inw)
1486
1487#elif defined(__i386)
1488
1489	ENTRY(inw)
1490	subl	%eax, %eax
1491	movw	PORT(%esp), %dx
1492	D16 inl	(%dx)
1493	ret
1494	SET_SIZE(inw)
1495
1496#endif	/* __i386 */
1497#endif	/* __lint */
1498
1499
1500#if defined(__lint)
1501
1502/* ARGSUSED */
1503uint8_t
1504inb(int port_address)
1505{ return (0); }
1506
1507#else	/* __lint */
1508
1509#if defined(__amd64)
1510
1511	ENTRY(inb)
1512	xorl	%eax, %eax
1513	movw	%di, %dx
1514	inb	(%dx)
1515	ret
1516	SET_SIZE(inb)
1517
1518#elif defined(__i386)
1519
1520	ENTRY(inb)
1521	subl    %eax, %eax
1522	movw	PORT(%esp), %dx
1523	inb	(%dx)
1524	ret
1525	SET_SIZE(inb)
1526
1527#endif	/* __i386 */
1528#endif	/* __lint */
1529
1530
1531#if defined(__lint)
1532
1533/* ARGSUSED */
1534void
1535repoutsw(int port, uint16_t *addr, int cnt)
1536{}
1537
1538#else	/* __lint */
1539
1540#if defined(__amd64)
1541
1542	ENTRY(repoutsw)
1543	movl	%edx, %ecx
1544	movw	%di, %dx
1545	rep
1546	  D16 outsl
1547	ret
1548	SET_SIZE(repoutsw)
1549
1550#elif defined(__i386)
1551
1552	/*
1553	 * The arguments and saved registers are on the stack in the
1554	 *  following order:
1555	 *      |  cnt  |  +16
1556	 *      | *addr |  +12
1557	 *      | port  |  +8
1558	 *      |  eip  |  +4
1559	 *      |  esi  |  <-- %esp
1560	 * If additional values are pushed onto the stack, make sure
1561	 * to adjust the following constants accordingly.
1562	 */
1563	.set	PORT, 8
1564	.set	ADDR, 12
1565	.set	COUNT, 16
1566
1567	ENTRY(repoutsw)
1568	pushl	%esi
1569	movl	PORT(%esp), %edx
1570	movl	ADDR(%esp), %esi
1571	movl	COUNT(%esp), %ecx
1572	rep
1573	  D16 outsl
1574	popl	%esi
1575	ret
1576	SET_SIZE(repoutsw)
1577
1578#endif	/* __i386 */
1579#endif	/* __lint */
1580
1581
1582#if defined(__lint)
1583
1584/* ARGSUSED */
1585void
1586repinsw(int port_addr, uint16_t *addr, int cnt)
1587{}
1588
1589#else	/* __lint */
1590
1591#if defined(__amd64)
1592
1593	ENTRY(repinsw)
1594	movl	%edx, %ecx
1595	movw	%di, %dx
1596	rep
1597	  D16 insl
1598	ret
1599	SET_SIZE(repinsw)
1600
1601#elif defined(__i386)
1602
1603	ENTRY(repinsw)
1604	pushl	%edi
1605	movl	PORT(%esp), %edx
1606	movl	ADDR(%esp), %edi
1607	movl	COUNT(%esp), %ecx
1608	rep
1609	  D16 insl
1610	popl	%edi
1611	ret
1612	SET_SIZE(repinsw)
1613
1614#endif	/* __i386 */
1615#endif	/* __lint */
1616
1617
1618#if defined(__lint)
1619
1620/* ARGSUSED */
1621void
1622repinsb(int port, uint8_t *addr, int count)
1623{}
1624
1625#else	/* __lint */
1626
1627#if defined(__amd64)
1628
1629	ENTRY(repinsb)
1630	movl	%edx, %ecx
1631	movw	%di, %dx
1632	movq	%rsi, %rdi
1633	rep
1634	  insb
1635	ret
1636	SET_SIZE(repinsb)
1637
1638#elif defined(__i386)
1639
1640	/*
1641	 * The arguments and saved registers are on the stack in the
1642	 *  following order:
1643	 *      |  cnt  |  +16
1644	 *      | *addr |  +12
1645	 *      | port  |  +8
1646	 *      |  eip  |  +4
1647	 *      |  esi  |  <-- %esp
1648	 * If additional values are pushed onto the stack, make sure
1649	 * to adjust the following constants accordingly.
1650	 */
1651	.set	IO_PORT, 8
1652	.set	IO_ADDR, 12
1653	.set	IO_COUNT, 16
1654
1655	ENTRY(repinsb)
1656	pushl	%edi
1657	movl	IO_ADDR(%esp), %edi
1658	movl	IO_COUNT(%esp), %ecx
1659	movl	IO_PORT(%esp), %edx
1660	rep
1661	  insb
1662	popl	%edi
1663	ret
1664	SET_SIZE(repinsb)
1665
1666#endif	/* __i386 */
1667#endif	/* __lint */
1668
1669
1670/*
1671 * Input a stream of 32-bit words.
1672 * NOTE: count is a DWORD count.
1673 */
1674#if defined(__lint)
1675
1676/* ARGSUSED */
1677void
1678repinsd(int port, uint32_t *addr, int count)
1679{}
1680
1681#else	/* __lint */
1682
1683#if defined(__amd64)
1684
1685	ENTRY(repinsd)
1686	movl	%edx, %ecx
1687	movw	%di, %dx
1688	movq	%rsi, %rdi
1689	rep
1690	  insl
1691	ret
1692	SET_SIZE(repinsd)
1693
1694#elif defined(__i386)
1695
1696	ENTRY(repinsd)
1697	pushl	%edi
1698	movl	IO_ADDR(%esp), %edi
1699	movl	IO_COUNT(%esp), %ecx
1700	movl	IO_PORT(%esp), %edx
1701	rep
1702	  insl
1703	popl	%edi
1704	ret
1705	SET_SIZE(repinsd)
1706
1707#endif	/* __i386 */
1708#endif	/* __lint */
1709
1710/*
1711 * Output a stream of bytes
1712 * NOTE: count is a byte count
1713 */
1714#if defined(__lint)
1715
1716/* ARGSUSED */
1717void
1718repoutsb(int port, uint8_t *addr, int count)
1719{}
1720
1721#else	/* __lint */
1722
1723#if defined(__amd64)
1724
1725	ENTRY(repoutsb)
1726	movl	%edx, %ecx
1727	movw	%di, %dx
1728	rep
1729	  outsb
1730	ret
1731	SET_SIZE(repoutsb)
1732
1733#elif defined(__i386)
1734
1735	ENTRY(repoutsb)
1736	pushl	%esi
1737	movl	IO_ADDR(%esp), %esi
1738	movl	IO_COUNT(%esp), %ecx
1739	movl	IO_PORT(%esp), %edx
1740	rep
1741	  outsb
1742	popl	%esi
1743	ret
1744	SET_SIZE(repoutsb)
1745
1746#endif	/* __i386 */
1747#endif	/* __lint */
1748
1749/*
1750 * Output a stream of 32-bit words
1751 * NOTE: count is a DWORD count
1752 */
1753#if defined(__lint)
1754
1755/* ARGSUSED */
1756void
1757repoutsd(int port, uint32_t *addr, int count)
1758{}
1759
1760#else	/* __lint */
1761
1762#if defined(__amd64)
1763
1764	ENTRY(repoutsd)
1765	movl	%edx, %ecx
1766	movw	%di, %dx
1767	rep
1768	  outsl
1769	ret
1770	SET_SIZE(repoutsd)
1771
1772#elif defined(__i386)
1773
1774	ENTRY(repoutsd)
1775	pushl	%esi
1776	movl	IO_ADDR(%esp), %esi
1777	movl	IO_COUNT(%esp), %ecx
1778	movl	IO_PORT(%esp), %edx
1779	rep
1780	  outsl
1781	popl	%esi
1782	ret
1783	SET_SIZE(repoutsd)
1784
1785#endif	/* __i386 */
1786#endif	/* __lint */
1787
1788/*
1789 * void int3(void)
1790 * void int18(void)
1791 * void int20(void)
1792 */
1793
1794#if defined(__lint)
1795
1796void
1797int3(void)
1798{}
1799
1800void
1801int18(void)
1802{}
1803
1804void
1805int20(void)
1806{}
1807
1808#else	/* __lint */
1809
1810	ENTRY(int3)
1811	int	$T_BPTFLT
1812	ret
1813	SET_SIZE(int3)
1814
1815	ENTRY(int18)
1816	int	$T_MCE
1817	ret
1818	SET_SIZE(int18)
1819
1820	ENTRY(int20)
1821	movl	boothowto, %eax
1822	andl	$RB_DEBUG, %eax
1823	jz	1f
1824
1825	int	$T_DBGENTR
18261:
1827	rep;	ret	/* use 2 byte return instruction when branch target */
1828			/* AMD Software Optimization Guide - Section 6.2 */
1829	SET_SIZE(int20)
1830
1831#endif	/* __lint */
1832
1833#if defined(__lint)
1834
1835/* ARGSUSED */
1836int
1837scanc(size_t size, uchar_t *cp, uchar_t *table, uchar_t mask)
1838{ return (0); }
1839
1840#else	/* __lint */
1841
1842#if defined(__amd64)
1843
1844	ENTRY(scanc)
1845					/* rdi == size */
1846					/* rsi == cp */
1847					/* rdx == table */
1848					/* rcx == mask */
1849	addq	%rsi, %rdi		/* end = &cp[size] */
1850.scanloop:
1851	cmpq	%rdi, %rsi		/* while (cp < end */
1852	jnb	.scandone
1853	movzbq	(%rsi), %r8		/* %r8 = *cp */
1854	incq	%rsi			/* cp++ */
1855	testb	%cl, (%r8, %rdx)
1856	jz	.scanloop		/*  && (table[*cp] & mask) == 0) */
1857	decq	%rsi			/* (fix post-increment) */
1858.scandone:
1859	movl	%edi, %eax
1860	subl	%esi, %eax		/* return (end - cp) */
1861	ret
1862	SET_SIZE(scanc)
1863
1864#elif defined(__i386)
1865
1866	ENTRY(scanc)
1867	pushl	%edi
1868	pushl	%esi
1869	movb	24(%esp), %cl		/* mask = %cl */
1870	movl	16(%esp), %esi		/* cp = %esi */
1871	movl	20(%esp), %edx		/* table = %edx */
1872	movl	%esi, %edi
1873	addl	12(%esp), %edi		/* end = &cp[size]; */
1874.scanloop:
1875	cmpl	%edi, %esi		/* while (cp < end */
1876	jnb	.scandone
1877	movzbl	(%esi),  %eax		/* %al = *cp */
1878	incl	%esi			/* cp++ */
1879	movb	(%edx,  %eax), %al	/* %al = table[*cp] */
1880	testb	%al, %cl
1881	jz	.scanloop		/*   && (table[*cp] & mask) == 0) */
1882	dec	%esi			/* post-incremented */
1883.scandone:
1884	movl	%edi, %eax
1885	subl	%esi, %eax		/* return (end - cp) */
1886	popl	%esi
1887	popl	%edi
1888	ret
1889	SET_SIZE(scanc)
1890
1891#endif	/* __i386 */
1892#endif	/* __lint */
1893
1894/*
1895 * Replacement functions for ones that are normally inlined.
1896 * In addition to the copy in i86.il, they are defined here just in case.
1897 */
1898
1899#if defined(__lint)
1900
1901ulong_t
1902intr_clear(void)
1903{ return (0); }
1904
1905ulong_t
1906clear_int_flag(void)
1907{ return (0); }
1908
1909#else	/* __lint */
1910
1911#if defined(__amd64)
1912
1913	ENTRY(intr_clear)
1914	ENTRY(clear_int_flag)
1915	pushfq
1916	popq	%rax
1917#if defined(__xpv)
1918	leaq	xpv_panicking, %rdi
1919	movl	(%rdi), %edi
1920	cmpl	$0, %edi
1921	jne	2f
1922	CLIRET(%rdi, %dl)	/* returns event mask in %dl */
1923	/*
1924	 * Synthesize the PS_IE bit from the event mask bit
1925	 */
1926	andq    $_BITNOT(PS_IE), %rax
1927	testb	$1, %dl
1928	jnz	1f
1929	orq	$PS_IE, %rax
19301:
1931	ret
19322:
1933#endif
1934	CLI(%rdi)
1935	ret
1936	SET_SIZE(clear_int_flag)
1937	SET_SIZE(intr_clear)
1938
1939#elif defined(__i386)
1940
1941	ENTRY(intr_clear)
1942	ENTRY(clear_int_flag)
1943	pushfl
1944	popl	%eax
1945#if defined(__xpv)
1946	leal	xpv_panicking, %edx
1947	movl	(%edx), %edx
1948	cmpl	$0, %edx
1949	jne	2f
1950	CLIRET(%edx, %cl)	/* returns event mask in %cl */
1951	/*
1952	 * Synthesize the PS_IE bit from the event mask bit
1953	 */
1954	andl    $_BITNOT(PS_IE), %eax
1955	testb	$1, %cl
1956	jnz	1f
1957	orl	$PS_IE, %eax
19581:
1959	ret
19602:
1961#endif
1962	CLI(%edx)
1963	ret
1964	SET_SIZE(clear_int_flag)
1965	SET_SIZE(intr_clear)
1966
1967#endif	/* __i386 */
1968#endif	/* __lint */
1969
1970#if defined(__lint)
1971
1972struct cpu *
1973curcpup(void)
1974{ return 0; }
1975
1976#else	/* __lint */
1977
1978#if defined(__amd64)
1979
1980	ENTRY(curcpup)
1981	movq	%gs:CPU_SELF, %rax
1982	ret
1983	SET_SIZE(curcpup)
1984
1985#elif defined(__i386)
1986
1987	ENTRY(curcpup)
1988	movl	%gs:CPU_SELF, %eax
1989	ret
1990	SET_SIZE(curcpup)
1991
1992#endif	/* __i386 */
1993#endif	/* __lint */
1994
1995#if defined(__lint)
1996
1997/* ARGSUSED */
1998uint32_t
1999htonl(uint32_t i)
2000{ return (0); }
2001
2002/* ARGSUSED */
2003uint32_t
2004ntohl(uint32_t i)
2005{ return (0); }
2006
2007#else	/* __lint */
2008
2009#if defined(__amd64)
2010
2011	/* XX64 there must be shorter sequences for this */
2012	ENTRY(htonl)
2013	ALTENTRY(ntohl)
2014	movl	%edi, %eax
2015	bswap	%eax
2016	ret
2017	SET_SIZE(ntohl)
2018	SET_SIZE(htonl)
2019
2020#elif defined(__i386)
2021
2022	ENTRY(htonl)
2023	ALTENTRY(ntohl)
2024	movl	4(%esp), %eax
2025	bswap	%eax
2026	ret
2027	SET_SIZE(ntohl)
2028	SET_SIZE(htonl)
2029
2030#endif	/* __i386 */
2031#endif	/* __lint */
2032
2033#if defined(__lint)
2034
2035/* ARGSUSED */
2036uint16_t
2037htons(uint16_t i)
2038{ return (0); }
2039
2040/* ARGSUSED */
2041uint16_t
2042ntohs(uint16_t i)
2043{ return (0); }
2044
2045
2046#else	/* __lint */
2047
2048#if defined(__amd64)
2049
2050	/* XX64 there must be better sequences for this */
2051	ENTRY(htons)
2052	ALTENTRY(ntohs)
2053	movl	%edi, %eax
2054	bswap	%eax
2055	shrl	$16, %eax
2056	ret
2057	SET_SIZE(ntohs)
2058	SET_SIZE(htons)
2059
2060#elif defined(__i386)
2061
2062	ENTRY(htons)
2063	ALTENTRY(ntohs)
2064	movl	4(%esp), %eax
2065	bswap	%eax
2066	shrl	$16, %eax
2067	ret
2068	SET_SIZE(ntohs)
2069	SET_SIZE(htons)
2070
2071#endif	/* __i386 */
2072#endif	/* __lint */
2073
2074
2075#if defined(__lint)
2076
2077/* ARGSUSED */
2078void
2079intr_restore(ulong_t i)
2080{ return; }
2081
2082/* ARGSUSED */
2083void
2084restore_int_flag(ulong_t i)
2085{ return; }
2086
2087#else	/* __lint */
2088
2089#if defined(__amd64)
2090
2091	ENTRY(intr_restore)
2092	ENTRY(restore_int_flag)
2093	testq	$PS_IE, %rdi
2094	jz	1f
2095#if defined(__xpv)
2096	leaq	xpv_panicking, %rsi
2097	movl	(%rsi), %esi
2098	cmpl	$0, %esi
2099	jne	1f
2100	/*
2101	 * Since we're -really- running unprivileged, our attempt
2102	 * to change the state of the IF bit will be ignored.
2103	 * The virtual IF bit is tweaked by CLI and STI.
2104	 */
2105	IE_TO_EVENT_MASK(%rsi, %rdi)
2106#else
2107	sti
2108#endif
21091:
2110	ret
2111	SET_SIZE(restore_int_flag)
2112	SET_SIZE(intr_restore)
2113
2114#elif defined(__i386)
2115
2116	ENTRY(intr_restore)
2117	ENTRY(restore_int_flag)
2118	testl	$PS_IE, 4(%esp)
2119	jz	1f
2120#if defined(__xpv)
2121	leal	xpv_panicking, %edx
2122	movl	(%edx), %edx
2123	cmpl	$0, %edx
2124	jne	1f
2125	/*
2126	 * Since we're -really- running unprivileged, our attempt
2127	 * to change the state of the IF bit will be ignored.
2128	 * The virtual IF bit is tweaked by CLI and STI.
2129	 */
2130	IE_TO_EVENT_MASK(%edx, 4(%esp))
2131#else
2132	sti
2133#endif
21341:
2135	ret
2136	SET_SIZE(restore_int_flag)
2137	SET_SIZE(intr_restore)
2138
2139#endif	/* __i386 */
2140#endif	/* __lint */
2141
2142#if defined(__lint)
2143
2144void
2145sti(void)
2146{}
2147
2148void
2149cli(void)
2150{}
2151
2152#else	/* __lint */
2153
2154	ENTRY(sti)
2155	STI
2156	ret
2157	SET_SIZE(sti)
2158
2159	ENTRY(cli)
2160#if defined(__amd64)
2161	CLI(%rax)
2162#elif defined(__i386)
2163	CLI(%eax)
2164#endif	/* __i386 */
2165	ret
2166	SET_SIZE(cli)
2167
2168#endif	/* __lint */
2169
2170#if defined(__lint)
2171
2172dtrace_icookie_t
2173dtrace_interrupt_disable(void)
2174{ return (0); }
2175
2176#else   /* __lint */
2177
2178#if defined(__amd64)
2179
2180	ENTRY(dtrace_interrupt_disable)
2181	pushfq
2182	popq	%rax
2183#if defined(__xpv)
2184	leaq	xpv_panicking, %rdi
2185	movl	(%rdi), %edi
2186	cmpl	$0, %edi
2187	jne	1f
2188	CLIRET(%rdi, %dl)	/* returns event mask in %dl */
2189	/*
2190	 * Synthesize the PS_IE bit from the event mask bit
2191	 */
2192	andq    $_BITNOT(PS_IE), %rax
2193	testb	$1, %dl
2194	jnz	1f
2195	orq	$PS_IE, %rax
21961:
2197#else
2198	CLI(%rdx)
2199#endif
2200	ret
2201	SET_SIZE(dtrace_interrupt_disable)
2202
2203#elif defined(__i386)
2204
2205	ENTRY(dtrace_interrupt_disable)
2206	pushfl
2207	popl	%eax
2208#if defined(__xpv)
2209	leal	xpv_panicking, %edx
2210	movl	(%edx), %edx
2211	cmpl	$0, %edx
2212	jne	1f
2213	CLIRET(%edx, %cl)	/* returns event mask in %cl */
2214	/*
2215	 * Synthesize the PS_IE bit from the event mask bit
2216	 */
2217	andl    $_BITNOT(PS_IE), %eax
2218	testb	$1, %cl
2219	jnz	1f
2220	orl	$PS_IE, %eax
22211:
2222#else
2223	CLI(%edx)
2224#endif
2225	ret
2226	SET_SIZE(dtrace_interrupt_disable)
2227
2228#endif	/* __i386 */
2229#endif	/* __lint */
2230
2231#if defined(__lint)
2232
2233/*ARGSUSED*/
2234void
2235dtrace_interrupt_enable(dtrace_icookie_t cookie)
2236{}
2237
2238#else	/* __lint */
2239
2240#if defined(__amd64)
2241
2242	ENTRY(dtrace_interrupt_enable)
2243	pushq	%rdi
2244	popfq
2245#if defined(__xpv)
2246	leaq	xpv_panicking, %rdx
2247	movl	(%rdx), %edx
2248	cmpl	$0, %edx
2249	jne	1f
2250	/*
2251	 * Since we're -really- running unprivileged, our attempt
2252	 * to change the state of the IF bit will be ignored. The
2253	 * virtual IF bit is tweaked by CLI and STI.
2254	 */
2255	IE_TO_EVENT_MASK(%rdx, %rdi)
2256#endif
2257	ret
2258	SET_SIZE(dtrace_interrupt_enable)
2259
2260#elif defined(__i386)
2261
2262	ENTRY(dtrace_interrupt_enable)
2263	movl	4(%esp), %eax
2264	pushl	%eax
2265	popfl
2266#if defined(__xpv)
2267	leal	xpv_panicking, %edx
2268	movl	(%edx), %edx
2269	cmpl	$0, %edx
2270	jne	1f
2271	/*
2272	 * Since we're -really- running unprivileged, our attempt
2273	 * to change the state of the IF bit will be ignored. The
2274	 * virtual IF bit is tweaked by CLI and STI.
2275	 */
2276	IE_TO_EVENT_MASK(%edx, %eax)
2277#endif
2278	ret
2279	SET_SIZE(dtrace_interrupt_enable)
2280
2281#endif	/* __i386 */
2282#endif	/* __lint */
2283
2284
2285#if defined(lint)
2286
2287void
2288dtrace_membar_producer(void)
2289{}
2290
2291void
2292dtrace_membar_consumer(void)
2293{}
2294
2295#else	/* __lint */
2296
2297	ENTRY(dtrace_membar_producer)
2298	rep;	ret	/* use 2 byte return instruction when branch target */
2299			/* AMD Software Optimization Guide - Section 6.2 */
2300	SET_SIZE(dtrace_membar_producer)
2301
2302	ENTRY(dtrace_membar_consumer)
2303	rep;	ret	/* use 2 byte return instruction when branch target */
2304			/* AMD Software Optimization Guide - Section 6.2 */
2305	SET_SIZE(dtrace_membar_consumer)
2306
2307#endif	/* __lint */
2308
2309#if defined(__lint)
2310
2311kthread_id_t
2312threadp(void)
2313{ return ((kthread_id_t)0); }
2314
2315#else	/* __lint */
2316
2317#if defined(__amd64)
2318
2319	ENTRY(threadp)
2320	movq	%gs:CPU_THREAD, %rax
2321	ret
2322	SET_SIZE(threadp)
2323
2324#elif defined(__i386)
2325
2326	ENTRY(threadp)
2327	movl	%gs:CPU_THREAD, %eax
2328	ret
2329	SET_SIZE(threadp)
2330
2331#endif	/* __i386 */
2332#endif	/* __lint */
2333
2334/*
2335 *   Checksum routine for Internet Protocol Headers
2336 */
2337
2338#if defined(__lint)
2339
2340/* ARGSUSED */
2341unsigned int
2342ip_ocsum(
2343	ushort_t *address,	/* ptr to 1st message buffer */
2344	int halfword_count,	/* length of data */
2345	unsigned int sum)	/* partial checksum */
2346{
2347	int		i;
2348	unsigned int	psum = 0;	/* partial sum */
2349
2350	for (i = 0; i < halfword_count; i++, address++) {
2351		psum += *address;
2352	}
2353
2354	while ((psum >> 16) != 0) {
2355		psum = (psum & 0xffff) + (psum >> 16);
2356	}
2357
2358	psum += sum;
2359
2360	while ((psum >> 16) != 0) {
2361		psum = (psum & 0xffff) + (psum >> 16);
2362	}
2363
2364	return (psum);
2365}
2366
2367#else	/* __lint */
2368
2369#if defined(__amd64)
2370
2371	ENTRY(ip_ocsum)
2372	pushq	%rbp
2373	movq	%rsp, %rbp
2374#ifdef DEBUG
2375	movq	postbootkernelbase(%rip), %rax
2376	cmpq	%rax, %rdi
2377	jnb	1f
2378	xorl	%eax, %eax
2379	movq	%rdi, %rsi
2380	leaq	.ip_ocsum_panic_msg(%rip), %rdi
2381	call	panic
2382	/*NOTREACHED*/
2383.ip_ocsum_panic_msg:
2384	.string	"ip_ocsum: address 0x%p below kernelbase\n"
23851:
2386#endif
2387	movl	%esi, %ecx	/* halfword_count */
2388	movq	%rdi, %rsi	/* address */
2389				/* partial sum in %edx */
2390	xorl	%eax, %eax
2391	testl	%ecx, %ecx
2392	jz	.ip_ocsum_done
2393	testq	$3, %rsi
2394	jnz	.ip_csum_notaligned
2395.ip_csum_aligned:	/* XX64 opportunities for 8-byte operations? */
2396.next_iter:
2397	/* XX64 opportunities for prefetch? */
2398	/* XX64 compute csum with 64 bit quantities? */
2399	subl	$32, %ecx
2400	jl	.less_than_32
2401
2402	addl	0(%rsi), %edx
2403.only60:
2404	adcl	4(%rsi), %eax
2405.only56:
2406	adcl	8(%rsi), %edx
2407.only52:
2408	adcl	12(%rsi), %eax
2409.only48:
2410	adcl	16(%rsi), %edx
2411.only44:
2412	adcl	20(%rsi), %eax
2413.only40:
2414	adcl	24(%rsi), %edx
2415.only36:
2416	adcl	28(%rsi), %eax
2417.only32:
2418	adcl	32(%rsi), %edx
2419.only28:
2420	adcl	36(%rsi), %eax
2421.only24:
2422	adcl	40(%rsi), %edx
2423.only20:
2424	adcl	44(%rsi), %eax
2425.only16:
2426	adcl	48(%rsi), %edx
2427.only12:
2428	adcl	52(%rsi), %eax
2429.only8:
2430	adcl	56(%rsi), %edx
2431.only4:
2432	adcl	60(%rsi), %eax	/* could be adding -1 and -1 with a carry */
2433.only0:
2434	adcl	$0, %eax	/* could be adding -1 in eax with a carry */
2435	adcl	$0, %eax
2436
2437	addq	$64, %rsi
2438	testl	%ecx, %ecx
2439	jnz	.next_iter
2440
2441.ip_ocsum_done:
2442	addl	%eax, %edx
2443	adcl	$0, %edx
2444	movl	%edx, %eax	/* form a 16 bit checksum by */
2445	shrl	$16, %eax	/* adding two halves of 32 bit checksum */
2446	addw	%dx, %ax
2447	adcw	$0, %ax
2448	andl	$0xffff, %eax
2449	leave
2450	ret
2451
2452.ip_csum_notaligned:
2453	xorl	%edi, %edi
2454	movw	(%rsi), %di
2455	addl	%edi, %edx
2456	adcl	$0, %edx
2457	addq	$2, %rsi
2458	decl	%ecx
2459	jmp	.ip_csum_aligned
2460
2461.less_than_32:
2462	addl	$32, %ecx
2463	testl	$1, %ecx
2464	jz	.size_aligned
2465	andl	$0xfe, %ecx
2466	movzwl	(%rsi, %rcx, 2), %edi
2467	addl	%edi, %edx
2468	adcl	$0, %edx
2469.size_aligned:
2470	movl	%ecx, %edi
2471	shrl	$1, %ecx
2472	shl	$1, %edi
2473	subq	$64, %rdi
2474	addq	%rdi, %rsi
2475	leaq    .ip_ocsum_jmptbl(%rip), %rdi
2476	leaq	(%rdi, %rcx, 8), %rdi
2477	xorl	%ecx, %ecx
2478	clc
2479	jmp 	*(%rdi)
2480
2481	.align	8
2482.ip_ocsum_jmptbl:
2483	.quad	.only0, .only4, .only8, .only12, .only16, .only20
2484	.quad	.only24, .only28, .only32, .only36, .only40, .only44
2485	.quad	.only48, .only52, .only56, .only60
2486	SET_SIZE(ip_ocsum)
2487
2488#elif defined(__i386)
2489
2490	ENTRY(ip_ocsum)
2491	pushl	%ebp
2492	movl	%esp, %ebp
2493	pushl	%ebx
2494	pushl	%esi
2495	pushl	%edi
2496	movl	12(%ebp), %ecx	/* count of half words */
2497	movl	16(%ebp), %edx	/* partial checksum */
2498	movl	8(%ebp), %esi
2499	xorl	%eax, %eax
2500	testl	%ecx, %ecx
2501	jz	.ip_ocsum_done
2502
2503	testl	$3, %esi
2504	jnz	.ip_csum_notaligned
2505.ip_csum_aligned:
2506.next_iter:
2507	subl	$32, %ecx
2508	jl	.less_than_32
2509
2510	addl	0(%esi), %edx
2511.only60:
2512	adcl	4(%esi), %eax
2513.only56:
2514	adcl	8(%esi), %edx
2515.only52:
2516	adcl	12(%esi), %eax
2517.only48:
2518	adcl	16(%esi), %edx
2519.only44:
2520	adcl	20(%esi), %eax
2521.only40:
2522	adcl	24(%esi), %edx
2523.only36:
2524	adcl	28(%esi), %eax
2525.only32:
2526	adcl	32(%esi), %edx
2527.only28:
2528	adcl	36(%esi), %eax
2529.only24:
2530	adcl	40(%esi), %edx
2531.only20:
2532	adcl	44(%esi), %eax
2533.only16:
2534	adcl	48(%esi), %edx
2535.only12:
2536	adcl	52(%esi), %eax
2537.only8:
2538	adcl	56(%esi), %edx
2539.only4:
2540	adcl	60(%esi), %eax	/* We could be adding -1 and -1 with a carry */
2541.only0:
2542	adcl	$0, %eax	/* we could be adding -1 in eax with a carry */
2543	adcl	$0, %eax
2544
2545	addl	$64, %esi
2546	andl	%ecx, %ecx
2547	jnz	.next_iter
2548
2549.ip_ocsum_done:
2550	addl	%eax, %edx
2551	adcl	$0, %edx
2552	movl	%edx, %eax	/* form a 16 bit checksum by */
2553	shrl	$16, %eax	/* adding two halves of 32 bit checksum */
2554	addw	%dx, %ax
2555	adcw	$0, %ax
2556	andl	$0xffff, %eax
2557	popl	%edi		/* restore registers */
2558	popl	%esi
2559	popl	%ebx
2560	leave
2561	ret
2562
2563.ip_csum_notaligned:
2564	xorl	%edi, %edi
2565	movw	(%esi), %di
2566	addl	%edi, %edx
2567	adcl	$0, %edx
2568	addl	$2, %esi
2569	decl	%ecx
2570	jmp	.ip_csum_aligned
2571
2572.less_than_32:
2573	addl	$32, %ecx
2574	testl	$1, %ecx
2575	jz	.size_aligned
2576	andl	$0xfe, %ecx
2577	movzwl	(%esi, %ecx, 2), %edi
2578	addl	%edi, %edx
2579	adcl	$0, %edx
2580.size_aligned:
2581	movl	%ecx, %edi
2582	shrl	$1, %ecx
2583	shl	$1, %edi
2584	subl	$64, %edi
2585	addl	%edi, %esi
2586	movl	$.ip_ocsum_jmptbl, %edi
2587	lea	(%edi, %ecx, 4), %edi
2588	xorl	%ecx, %ecx
2589	clc
2590	jmp 	*(%edi)
2591	SET_SIZE(ip_ocsum)
2592
2593	.data
2594	.align	4
2595
2596.ip_ocsum_jmptbl:
2597	.long	.only0, .only4, .only8, .only12, .only16, .only20
2598	.long	.only24, .only28, .only32, .only36, .only40, .only44
2599	.long	.only48, .only52, .only56, .only60
2600
2601
2602#endif	/* __i386 */
2603#endif	/* __lint */
2604
2605/*
2606 * multiply two long numbers and yield a u_longlong_t result, callable from C.
2607 * Provided to manipulate hrtime_t values.
2608 */
2609#if defined(__lint)
2610
2611/* result = a * b; */
2612
2613/* ARGSUSED */
2614unsigned long long
2615mul32(uint_t a, uint_t b)
2616{ return (0); }
2617
2618#else	/* __lint */
2619
2620#if defined(__amd64)
2621
2622	ENTRY(mul32)
2623	xorl	%edx, %edx	/* XX64 joe, paranoia? */
2624	movl	%edi, %eax
2625	mull	%esi
2626	shlq	$32, %rdx
2627	orq	%rdx, %rax
2628	ret
2629	SET_SIZE(mul32)
2630
2631#elif defined(__i386)
2632
2633	ENTRY(mul32)
2634	movl	8(%esp), %eax
2635	movl	4(%esp), %ecx
2636	mull	%ecx
2637	ret
2638	SET_SIZE(mul32)
2639
2640#endif	/* __i386 */
2641#endif	/* __lint */
2642
2643#if defined(notused)
2644#if defined(__lint)
2645/* ARGSUSED */
2646void
2647load_pte64(uint64_t *pte, uint64_t pte_value)
2648{}
2649#else	/* __lint */
2650	.globl load_pte64
2651load_pte64:
2652	movl	4(%esp), %eax
2653	movl	8(%esp), %ecx
2654	movl	12(%esp), %edx
2655	movl	%edx, 4(%eax)
2656	movl	%ecx, (%eax)
2657	ret
2658#endif	/* __lint */
2659#endif	/* notused */
2660
2661#if defined(__lint)
2662
2663/*ARGSUSED*/
2664void
2665scan_memory(caddr_t addr, size_t size)
2666{}
2667
2668#else	/* __lint */
2669
2670#if defined(__amd64)
2671
2672	ENTRY(scan_memory)
2673	shrq	$3, %rsi	/* convert %rsi from byte to quadword count */
2674	jz	.scanm_done
2675	movq	%rsi, %rcx	/* move count into rep control register */
2676	movq	%rdi, %rsi	/* move addr into lodsq control reg. */
2677	rep lodsq		/* scan the memory range */
2678.scanm_done:
2679	rep;	ret	/* use 2 byte return instruction when branch target */
2680			/* AMD Software Optimization Guide - Section 6.2 */
2681	SET_SIZE(scan_memory)
2682
2683#elif defined(__i386)
2684
2685	ENTRY(scan_memory)
2686	pushl	%ecx
2687	pushl	%esi
2688	movl	16(%esp), %ecx	/* move 2nd arg into rep control register */
2689	shrl	$2, %ecx	/* convert from byte count to word count */
2690	jz	.scanm_done
2691	movl	12(%esp), %esi	/* move 1st arg into lodsw control register */
2692	.byte	0xf3		/* rep prefix.  lame assembler.  sigh. */
2693	lodsl
2694.scanm_done:
2695	popl	%esi
2696	popl	%ecx
2697	ret
2698	SET_SIZE(scan_memory)
2699
2700#endif	/* __i386 */
2701#endif	/* __lint */
2702
2703
2704#if defined(__lint)
2705
2706/*ARGSUSED */
2707int
2708lowbit(ulong_t i)
2709{ return (0); }
2710
2711#else	/* __lint */
2712
2713#if defined(__amd64)
2714
2715	ENTRY(lowbit)
2716	movl	$-1, %eax
2717	bsfq	%rdi, %rax
2718	incl	%eax
2719	ret
2720	SET_SIZE(lowbit)
2721
2722#elif defined(__i386)
2723
2724	ENTRY(lowbit)
2725	movl	$-1, %eax
2726	bsfl	4(%esp), %eax
2727	incl	%eax
2728	ret
2729	SET_SIZE(lowbit)
2730
2731#endif	/* __i386 */
2732#endif	/* __lint */
2733
2734#if defined(__lint)
2735
2736/*ARGSUSED*/
2737int
2738highbit(ulong_t i)
2739{ return (0); }
2740
2741#else	/* __lint */
2742
2743#if defined(__amd64)
2744
2745	ENTRY(highbit)
2746	movl	$-1, %eax
2747	bsrq	%rdi, %rax
2748	incl	%eax
2749	ret
2750	SET_SIZE(highbit)
2751
2752#elif defined(__i386)
2753
2754	ENTRY(highbit)
2755	movl	$-1, %eax
2756	bsrl	4(%esp), %eax
2757	incl	%eax
2758	ret
2759	SET_SIZE(highbit)
2760
2761#endif	/* __i386 */
2762#endif	/* __lint */
2763
2764#if defined(__lint)
2765
2766/*ARGSUSED*/
2767uint64_t
2768rdmsr(uint_t r)
2769{ return (0); }
2770
2771/*ARGSUSED*/
2772void
2773wrmsr(uint_t r, const uint64_t val)
2774{}
2775
2776/*ARGSUSED*/
2777uint64_t
2778xrdmsr(uint_t r)
2779{ return (0); }
2780
2781/*ARGSUSED*/
2782void
2783xwrmsr(uint_t r, const uint64_t val)
2784{}
2785
2786void
2787invalidate_cache(void)
2788{}
2789
2790#else  /* __lint */
2791
2792#define	XMSR_ACCESS_VAL		$0x9c5a203a
2793
2794#if defined(__amd64)
2795
2796	ENTRY(rdmsr)
2797	movl	%edi, %ecx
2798	rdmsr
2799	shlq	$32, %rdx
2800	orq	%rdx, %rax
2801	ret
2802	SET_SIZE(rdmsr)
2803
2804	ENTRY(wrmsr)
2805	movq	%rsi, %rdx
2806	shrq	$32, %rdx
2807	movl	%esi, %eax
2808	movl	%edi, %ecx
2809	wrmsr
2810	ret
2811	SET_SIZE(wrmsr)
2812
2813	ENTRY(xrdmsr)
2814	pushq	%rbp
2815	movq	%rsp, %rbp
2816	movl	%edi, %ecx
2817	movl	XMSR_ACCESS_VAL, %edi	/* this value is needed to access MSR */
2818	rdmsr
2819	shlq	$32, %rdx
2820	orq	%rdx, %rax
2821	leave
2822	ret
2823	SET_SIZE(xrdmsr)
2824
2825	ENTRY(xwrmsr)
2826	pushq	%rbp
2827	movq	%rsp, %rbp
2828	movl	%edi, %ecx
2829	movl	XMSR_ACCESS_VAL, %edi	/* this value is needed to access MSR */
2830	movq	%rsi, %rdx
2831	shrq	$32, %rdx
2832	movl	%esi, %eax
2833	wrmsr
2834	leave
2835	ret
2836	SET_SIZE(xwrmsr)
2837
2838#elif defined(__i386)
2839
2840	ENTRY(rdmsr)
2841	movl	4(%esp), %ecx
2842	rdmsr
2843	ret
2844	SET_SIZE(rdmsr)
2845
2846	ENTRY(wrmsr)
2847	movl	4(%esp), %ecx
2848	movl	8(%esp), %eax
2849	movl	12(%esp), %edx
2850	wrmsr
2851	ret
2852	SET_SIZE(wrmsr)
2853
2854	ENTRY(xrdmsr)
2855	pushl	%ebp
2856	movl	%esp, %ebp
2857	movl	8(%esp), %ecx
2858	pushl	%edi
2859	movl	XMSR_ACCESS_VAL, %edi	/* this value is needed to access MSR */
2860	rdmsr
2861	popl	%edi
2862	leave
2863	ret
2864	SET_SIZE(xrdmsr)
2865
2866	ENTRY(xwrmsr)
2867	pushl	%ebp
2868	movl	%esp, %ebp
2869	movl	8(%esp), %ecx
2870	movl	12(%esp), %eax
2871	movl	16(%esp), %edx
2872	pushl	%edi
2873	movl	XMSR_ACCESS_VAL, %edi	/* this value is needed to access MSR */
2874	wrmsr
2875	popl	%edi
2876	leave
2877	ret
2878	SET_SIZE(xwrmsr)
2879
2880#endif	/* __i386 */
2881
2882	ENTRY(invalidate_cache)
2883	wbinvd
2884	ret
2885	SET_SIZE(invalidate_cache)
2886
2887#endif	/* __lint */
2888
2889#if defined(__lint)
2890
2891/*ARGSUSED*/
2892void
2893getcregs(struct cregs *crp)
2894{}
2895
2896#else	/* __lint */
2897
2898#if defined(__amd64)
2899
2900	ENTRY_NP(getcregs)
2901#if defined(__xpv)
2902	/*
2903	 * Only a few of the hardware control registers or descriptor tables
2904	 * are directly accessible to us, so just zero the structure.
2905	 *
2906	 * XXPV	Perhaps it would be helpful for the hypervisor to return
2907	 *	virtualized versions of these for post-mortem use.
2908	 *	(Need to reevaluate - perhaps it already does!)
2909	 */
2910	pushq	%rdi		/* save *crp */
2911	movq	$CREGSZ, %rsi
2912	call	bzero
2913	popq	%rdi
2914
2915	/*
2916	 * Dump what limited information we can
2917	 */
2918	movq	%cr0, %rax
2919	movq	%rax, CREG_CR0(%rdi)	/* cr0 */
2920	movq	%cr2, %rax
2921	movq	%rax, CREG_CR2(%rdi)	/* cr2 */
2922	movq	%cr3, %rax
2923	movq	%rax, CREG_CR3(%rdi)	/* cr3 */
2924	movq	%cr4, %rax
2925	movq	%rax, CREG_CR4(%rdi)	/* cr4 */
2926
2927#else	/* __xpv */
2928
2929#define	GETMSR(r, off, d)	\
2930	movl	$r, %ecx;	\
2931	rdmsr;			\
2932	movl	%eax, off(d);	\
2933	movl	%edx, off+4(d)
2934
2935	xorl	%eax, %eax
2936	movq	%rax, CREG_GDT+8(%rdi)
2937	sgdt	CREG_GDT(%rdi)		/* 10 bytes */
2938	movq	%rax, CREG_IDT+8(%rdi)
2939	sidt	CREG_IDT(%rdi)		/* 10 bytes */
2940	movq	%rax, CREG_LDT(%rdi)
2941	sldt	CREG_LDT(%rdi)		/* 2 bytes */
2942	movq	%rax, CREG_TASKR(%rdi)
2943	str	CREG_TASKR(%rdi)	/* 2 bytes */
2944	movq	%cr0, %rax
2945	movq	%rax, CREG_CR0(%rdi)	/* cr0 */
2946	movq	%cr2, %rax
2947	movq	%rax, CREG_CR2(%rdi)	/* cr2 */
2948	movq	%cr3, %rax
2949	movq	%rax, CREG_CR3(%rdi)	/* cr3 */
2950	movq	%cr4, %rax
2951	movq	%rax, CREG_CR4(%rdi)	/* cr4 */
2952	movq	%cr8, %rax
2953	movq	%rax, CREG_CR8(%rdi)	/* cr8 */
2954	GETMSR(MSR_AMD_KGSBASE, CREG_KGSBASE, %rdi)
2955	GETMSR(MSR_AMD_EFER, CREG_EFER, %rdi)
2956#endif	/* __xpv */
2957	ret
2958	SET_SIZE(getcregs)
2959
2960#undef GETMSR
2961
2962#elif defined(__i386)
2963
2964	ENTRY_NP(getcregs)
2965#if defined(__xpv)
2966	/*
2967	 * Only a few of the hardware control registers or descriptor tables
2968	 * are directly accessible to us, so just zero the structure.
2969	 *
2970	 * XXPV	Perhaps it would be helpful for the hypervisor to return
2971	 *	virtualized versions of these for post-mortem use.
2972	 *	(Need to reevaluate - perhaps it already does!)
2973	 */
2974	movl	4(%esp), %edx
2975	pushl	$CREGSZ
2976	pushl	%edx
2977	call	bzero
2978	addl	$8, %esp
2979	movl	4(%esp), %edx
2980
2981	/*
2982	 * Dump what limited information we can
2983	 */
2984	movl	%cr0, %eax
2985	movl	%eax, CREG_CR0(%edx)	/* cr0 */
2986	movl	%cr2, %eax
2987	movl	%eax, CREG_CR2(%edx)	/* cr2 */
2988	movl	%cr3, %eax
2989	movl	%eax, CREG_CR3(%edx)	/* cr3 */
2990	movl	%cr4, %eax
2991	movl	%eax, CREG_CR4(%edx)	/* cr4 */
2992
2993#else	/* __xpv */
2994
2995	movl	4(%esp), %edx
2996	movw	$0, CREG_GDT+6(%edx)
2997	movw	$0, CREG_IDT+6(%edx)
2998	sgdt	CREG_GDT(%edx)		/* gdt */
2999	sidt	CREG_IDT(%edx)		/* idt */
3000	sldt	CREG_LDT(%edx)		/* ldt */
3001	str	CREG_TASKR(%edx)	/* task */
3002	movl	%cr0, %eax
3003	movl	%eax, CREG_CR0(%edx)	/* cr0 */
3004	movl	%cr2, %eax
3005	movl	%eax, CREG_CR2(%edx)	/* cr2 */
3006	movl	%cr3, %eax
3007	movl	%eax, CREG_CR3(%edx)	/* cr3 */
3008	testl	$X86_LARGEPAGE, x86_feature
3009	jz	.nocr4
3010	movl	%cr4, %eax
3011	movl	%eax, CREG_CR4(%edx)	/* cr4 */
3012	jmp	.skip
3013.nocr4:
3014	movl	$0, CREG_CR4(%edx)
3015.skip:
3016#endif
3017	ret
3018	SET_SIZE(getcregs)
3019
3020#endif	/* __i386 */
3021#endif	/* __lint */
3022
3023
3024/*
3025 * A panic trigger is a word which is updated atomically and can only be set
3026 * once.  We atomically store 0xDEFACEDD and load the old value.  If the
3027 * previous value was 0, we succeed and return 1; otherwise return 0.
3028 * This allows a partially corrupt trigger to still trigger correctly.  DTrace
3029 * has its own version of this function to allow it to panic correctly from
3030 * probe context.
3031 */
3032#if defined(__lint)
3033
3034/*ARGSUSED*/
3035int
3036panic_trigger(int *tp)
3037{ return (0); }
3038
3039/*ARGSUSED*/
3040int
3041dtrace_panic_trigger(int *tp)
3042{ return (0); }
3043
3044#else	/* __lint */
3045
3046#if defined(__amd64)
3047
3048	ENTRY_NP(panic_trigger)
3049	xorl	%eax, %eax
3050	movl	$0xdefacedd, %edx
3051	lock
3052	  xchgl	%edx, (%rdi)
3053	cmpl	$0, %edx
3054	je	0f
3055	movl	$0, %eax
3056	ret
30570:	movl	$1, %eax
3058	ret
3059	SET_SIZE(panic_trigger)
3060
3061	ENTRY_NP(dtrace_panic_trigger)
3062	xorl	%eax, %eax
3063	movl	$0xdefacedd, %edx
3064	lock
3065	  xchgl	%edx, (%rdi)
3066	cmpl	$0, %edx
3067	je	0f
3068	movl	$0, %eax
3069	ret
30700:	movl	$1, %eax
3071	ret
3072	SET_SIZE(dtrace_panic_trigger)
3073
3074#elif defined(__i386)
3075
3076	ENTRY_NP(panic_trigger)
3077	movl	4(%esp), %edx		/ %edx = address of trigger
3078	movl	$0xdefacedd, %eax	/ %eax = 0xdefacedd
3079	lock				/ assert lock
3080	xchgl %eax, (%edx)		/ exchange %eax and the trigger
3081	cmpl	$0, %eax		/ if (%eax == 0x0)
3082	je	0f			/   return (1);
3083	movl	$0, %eax		/ else
3084	ret				/   return (0);
30850:	movl	$1, %eax
3086	ret
3087	SET_SIZE(panic_trigger)
3088
3089	ENTRY_NP(dtrace_panic_trigger)
3090	movl	4(%esp), %edx		/ %edx = address of trigger
3091	movl	$0xdefacedd, %eax	/ %eax = 0xdefacedd
3092	lock				/ assert lock
3093	xchgl %eax, (%edx)		/ exchange %eax and the trigger
3094	cmpl	$0, %eax		/ if (%eax == 0x0)
3095	je	0f			/   return (1);
3096	movl	$0, %eax		/ else
3097	ret				/   return (0);
30980:	movl	$1, %eax
3099	ret
3100	SET_SIZE(dtrace_panic_trigger)
3101
3102#endif	/* __i386 */
3103#endif	/* __lint */
3104
3105/*
3106 * The panic() and cmn_err() functions invoke vpanic() as a common entry point
3107 * into the panic code implemented in panicsys().  vpanic() is responsible
3108 * for passing through the format string and arguments, and constructing a
3109 * regs structure on the stack into which it saves the current register
3110 * values.  If we are not dying due to a fatal trap, these registers will
3111 * then be preserved in panicbuf as the current processor state.  Before
3112 * invoking panicsys(), vpanic() activates the first panic trigger (see
3113 * common/os/panic.c) and switches to the panic_stack if successful.  Note that
3114 * DTrace takes a slightly different panic path if it must panic from probe
3115 * context.  Instead of calling panic, it calls into dtrace_vpanic(), which
3116 * sets up the initial stack as vpanic does, calls dtrace_panic_trigger(), and
3117 * branches back into vpanic().
3118 */
3119#if defined(__lint)
3120
3121/*ARGSUSED*/
3122void
3123vpanic(const char *format, va_list alist)
3124{}
3125
3126/*ARGSUSED*/
3127void
3128dtrace_vpanic(const char *format, va_list alist)
3129{}
3130
3131#else	/* __lint */
3132
3133#if defined(__amd64)
3134
3135	ENTRY_NP(vpanic)			/* Initial stack layout: */
3136
3137	pushq	%rbp				/* | %rip | 	0x60	*/
3138	movq	%rsp, %rbp			/* | %rbp |	0x58	*/
3139	pushfq					/* | rfl  |	0x50	*/
3140	pushq	%r11				/* | %r11 |	0x48	*/
3141	pushq	%r10				/* | %r10 |	0x40	*/
3142	pushq	%rbx				/* | %rbx |	0x38	*/
3143	pushq	%rax				/* | %rax |	0x30	*/
3144	pushq	%r9				/* | %r9  |	0x28	*/
3145	pushq	%r8				/* | %r8  |	0x20	*/
3146	pushq	%rcx				/* | %rcx |	0x18	*/
3147	pushq	%rdx				/* | %rdx |	0x10	*/
3148	pushq	%rsi				/* | %rsi |	0x8 alist */
3149	pushq	%rdi				/* | %rdi |	0x0 format */
3150
3151	movq	%rsp, %rbx			/* %rbx = current %rsp */
3152
3153	leaq	panic_quiesce(%rip), %rdi	/* %rdi = &panic_quiesce */
3154	call	panic_trigger			/* %eax = panic_trigger() */
3155
3156vpanic_common:
3157	/*
3158	 * The panic_trigger result is in %eax from the call above, and
3159	 * dtrace_panic places it in %eax before branching here.
3160	 * The rdmsr instructions that follow below will clobber %eax so
3161	 * we stash the panic_trigger result in %r11d.
3162	 */
3163	movl	%eax, %r11d
3164	cmpl	$0, %r11d
3165	je	0f
3166
3167	/*
3168	 * If panic_trigger() was successful, we are the first to initiate a
3169	 * panic: we now switch to the reserved panic_stack before continuing.
3170	 */
3171	leaq	panic_stack(%rip), %rsp
3172	addq	$PANICSTKSIZE, %rsp
31730:	subq	$REGSIZE, %rsp
3174	/*
3175	 * Now that we've got everything set up, store the register values as
3176	 * they were when we entered vpanic() to the designated location in
3177	 * the regs structure we allocated on the stack.
3178	 */
3179	movq	0x0(%rbx), %rcx
3180	movq	%rcx, REGOFF_RDI(%rsp)
3181	movq	0x8(%rbx), %rcx
3182	movq	%rcx, REGOFF_RSI(%rsp)
3183	movq	0x10(%rbx), %rcx
3184	movq	%rcx, REGOFF_RDX(%rsp)
3185	movq	0x18(%rbx), %rcx
3186	movq	%rcx, REGOFF_RCX(%rsp)
3187	movq	0x20(%rbx), %rcx
3188
3189	movq	%rcx, REGOFF_R8(%rsp)
3190	movq	0x28(%rbx), %rcx
3191	movq	%rcx, REGOFF_R9(%rsp)
3192	movq	0x30(%rbx), %rcx
3193	movq	%rcx, REGOFF_RAX(%rsp)
3194	movq	0x38(%rbx), %rcx
3195	movq	%rcx, REGOFF_RBX(%rsp)
3196	movq	0x58(%rbx), %rcx
3197
3198	movq	%rcx, REGOFF_RBP(%rsp)
3199	movq	0x40(%rbx), %rcx
3200	movq	%rcx, REGOFF_R10(%rsp)
3201	movq	0x48(%rbx), %rcx
3202	movq	%rcx, REGOFF_R11(%rsp)
3203	movq	%r12, REGOFF_R12(%rsp)
3204
3205	movq	%r13, REGOFF_R13(%rsp)
3206	movq	%r14, REGOFF_R14(%rsp)
3207	movq	%r15, REGOFF_R15(%rsp)
3208
3209	xorl	%ecx, %ecx
3210	movw	%ds, %cx
3211	movq	%rcx, REGOFF_DS(%rsp)
3212	movw	%es, %cx
3213	movq	%rcx, REGOFF_ES(%rsp)
3214	movw	%fs, %cx
3215	movq	%rcx, REGOFF_FS(%rsp)
3216	movw	%gs, %cx
3217	movq	%rcx, REGOFF_GS(%rsp)
3218
3219	movq	$0, REGOFF_TRAPNO(%rsp)
3220
3221	movq	$0, REGOFF_ERR(%rsp)
3222	leaq	vpanic(%rip), %rcx
3223	movq	%rcx, REGOFF_RIP(%rsp)
3224	movw	%cs, %cx
3225	movzwq	%cx, %rcx
3226	movq	%rcx, REGOFF_CS(%rsp)
3227	movq	0x50(%rbx), %rcx
3228	movq	%rcx, REGOFF_RFL(%rsp)
3229	movq	%rbx, %rcx
3230	addq	$0x60, %rcx
3231	movq	%rcx, REGOFF_RSP(%rsp)
3232	movw	%ss, %cx
3233	movzwq	%cx, %rcx
3234	movq	%rcx, REGOFF_SS(%rsp)
3235
3236	/*
3237	 * panicsys(format, alist, rp, on_panic_stack)
3238	 */
3239	movq	REGOFF_RDI(%rsp), %rdi		/* format */
3240	movq	REGOFF_RSI(%rsp), %rsi		/* alist */
3241	movq	%rsp, %rdx			/* struct regs */
3242	movl	%r11d, %ecx			/* on_panic_stack */
3243	call	panicsys
3244	addq	$REGSIZE, %rsp
3245	popq	%rdi
3246	popq	%rsi
3247	popq	%rdx
3248	popq	%rcx
3249	popq	%r8
3250	popq	%r9
3251	popq	%rax
3252	popq	%rbx
3253	popq	%r10
3254	popq	%r11
3255	popfq
3256	leave
3257	ret
3258	SET_SIZE(vpanic)
3259
3260	ENTRY_NP(dtrace_vpanic)			/* Initial stack layout: */
3261
3262	pushq	%rbp				/* | %rip | 	0x60	*/
3263	movq	%rsp, %rbp			/* | %rbp |	0x58	*/
3264	pushfq					/* | rfl  |	0x50	*/
3265	pushq	%r11				/* | %r11 |	0x48	*/
3266	pushq	%r10				/* | %r10 |	0x40	*/
3267	pushq	%rbx				/* | %rbx |	0x38	*/
3268	pushq	%rax				/* | %rax |	0x30	*/
3269	pushq	%r9				/* | %r9  |	0x28	*/
3270	pushq	%r8				/* | %r8  |	0x20	*/
3271	pushq	%rcx				/* | %rcx |	0x18	*/
3272	pushq	%rdx				/* | %rdx |	0x10	*/
3273	pushq	%rsi				/* | %rsi |	0x8 alist */
3274	pushq	%rdi				/* | %rdi |	0x0 format */
3275
3276	movq	%rsp, %rbx			/* %rbx = current %rsp */
3277
3278	leaq	panic_quiesce(%rip), %rdi	/* %rdi = &panic_quiesce */
3279	call	dtrace_panic_trigger	/* %eax = dtrace_panic_trigger() */
3280	jmp	vpanic_common
3281
3282	SET_SIZE(dtrace_vpanic)
3283
3284#elif defined(__i386)
3285
3286	ENTRY_NP(vpanic)			/ Initial stack layout:
3287
3288	pushl	%ebp				/ | %eip | 20
3289	movl	%esp, %ebp			/ | %ebp | 16
3290	pushl	%eax				/ | %eax | 12
3291	pushl	%ebx				/ | %ebx |  8
3292	pushl	%ecx				/ | %ecx |  4
3293	pushl	%edx				/ | %edx |  0
3294
3295	movl	%esp, %ebx			/ %ebx = current stack pointer
3296
3297	lea	panic_quiesce, %eax		/ %eax = &panic_quiesce
3298	pushl	%eax				/ push &panic_quiesce
3299	call	panic_trigger			/ %eax = panic_trigger()
3300	addl	$4, %esp			/ reset stack pointer
3301
3302vpanic_common:
3303	cmpl	$0, %eax			/ if (%eax == 0)
3304	je	0f				/   goto 0f;
3305
3306	/*
3307	 * If panic_trigger() was successful, we are the first to initiate a
3308	 * panic: we now switch to the reserved panic_stack before continuing.
3309	 */
3310	lea	panic_stack, %esp		/ %esp  = panic_stack
3311	addl	$PANICSTKSIZE, %esp		/ %esp += PANICSTKSIZE
3312
33130:	subl	$REGSIZE, %esp			/ allocate struct regs
3314
3315	/*
3316	 * Now that we've got everything set up, store the register values as
3317	 * they were when we entered vpanic() to the designated location in
3318	 * the regs structure we allocated on the stack.
3319	 */
3320#if !defined(__GNUC_AS__)
3321	movw	%gs, %edx
3322	movl	%edx, REGOFF_GS(%esp)
3323	movw	%fs, %edx
3324	movl	%edx, REGOFF_FS(%esp)
3325	movw	%es, %edx
3326	movl	%edx, REGOFF_ES(%esp)
3327	movw	%ds, %edx
3328	movl	%edx, REGOFF_DS(%esp)
3329#else	/* __GNUC_AS__ */
3330	mov	%gs, %edx
3331	mov	%edx, REGOFF_GS(%esp)
3332	mov	%fs, %edx
3333	mov	%edx, REGOFF_FS(%esp)
3334	mov	%es, %edx
3335	mov	%edx, REGOFF_ES(%esp)
3336	mov	%ds, %edx
3337	mov	%edx, REGOFF_DS(%esp)
3338#endif	/* __GNUC_AS__ */
3339	movl	%edi, REGOFF_EDI(%esp)
3340	movl	%esi, REGOFF_ESI(%esp)
3341	movl	16(%ebx), %ecx
3342	movl	%ecx, REGOFF_EBP(%esp)
3343	movl	%ebx, %ecx
3344	addl	$20, %ecx
3345	movl	%ecx, REGOFF_ESP(%esp)
3346	movl	8(%ebx), %ecx
3347	movl	%ecx, REGOFF_EBX(%esp)
3348	movl	0(%ebx), %ecx
3349	movl	%ecx, REGOFF_EDX(%esp)
3350	movl	4(%ebx), %ecx
3351	movl	%ecx, REGOFF_ECX(%esp)
3352	movl	12(%ebx), %ecx
3353	movl	%ecx, REGOFF_EAX(%esp)
3354	movl	$0, REGOFF_TRAPNO(%esp)
3355	movl	$0, REGOFF_ERR(%esp)
3356	lea	vpanic, %ecx
3357	movl	%ecx, REGOFF_EIP(%esp)
3358#if !defined(__GNUC_AS__)
3359	movw	%cs, %edx
3360#else	/* __GNUC_AS__ */
3361	mov	%cs, %edx
3362#endif	/* __GNUC_AS__ */
3363	movl	%edx, REGOFF_CS(%esp)
3364	pushfl
3365	popl	%ecx
3366#if defined(__xpv)
3367	/*
3368	 * Synthesize the PS_IE bit from the event mask bit
3369	 */
3370	CURTHREAD(%edx)
3371	KPREEMPT_DISABLE(%edx)
3372	EVENT_MASK_TO_IE(%edx, %ecx)
3373	CURTHREAD(%edx)
3374	KPREEMPT_ENABLE_NOKP(%edx)
3375#endif
3376	movl	%ecx, REGOFF_EFL(%esp)
3377	movl	$0, REGOFF_UESP(%esp)
3378#if !defined(__GNUC_AS__)
3379	movw	%ss, %edx
3380#else	/* __GNUC_AS__ */
3381	mov	%ss, %edx
3382#endif	/* __GNUC_AS__ */
3383	movl	%edx, REGOFF_SS(%esp)
3384
3385	movl	%esp, %ecx			/ %ecx = &regs
3386	pushl	%eax				/ push on_panic_stack
3387	pushl	%ecx				/ push &regs
3388	movl	12(%ebp), %ecx			/ %ecx = alist
3389	pushl	%ecx				/ push alist
3390	movl	8(%ebp), %ecx			/ %ecx = format
3391	pushl	%ecx				/ push format
3392	call	panicsys			/ panicsys();
3393	addl	$16, %esp			/ pop arguments
3394
3395	addl	$REGSIZE, %esp
3396	popl	%edx
3397	popl	%ecx
3398	popl	%ebx
3399	popl	%eax
3400	leave
3401	ret
3402	SET_SIZE(vpanic)
3403
3404	ENTRY_NP(dtrace_vpanic)			/ Initial stack layout:
3405
3406	pushl	%ebp				/ | %eip | 20
3407	movl	%esp, %ebp			/ | %ebp | 16
3408	pushl	%eax				/ | %eax | 12
3409	pushl	%ebx				/ | %ebx |  8
3410	pushl	%ecx				/ | %ecx |  4
3411	pushl	%edx				/ | %edx |  0
3412
3413	movl	%esp, %ebx			/ %ebx = current stack pointer
3414
3415	lea	panic_quiesce, %eax		/ %eax = &panic_quiesce
3416	pushl	%eax				/ push &panic_quiesce
3417	call	dtrace_panic_trigger		/ %eax = dtrace_panic_trigger()
3418	addl	$4, %esp			/ reset stack pointer
3419	jmp	vpanic_common			/ jump back to common code
3420
3421	SET_SIZE(dtrace_vpanic)
3422
3423#endif	/* __i386 */
3424#endif	/* __lint */
3425
3426#if defined(__lint)
3427
3428void
3429hres_tick(void)
3430{}
3431
3432int64_t timedelta;
3433hrtime_t hres_last_tick;
3434volatile timestruc_t hrestime;
3435int64_t hrestime_adj;
3436volatile int hres_lock;
3437hrtime_t hrtime_base;
3438
3439#else	/* __lint */
3440
3441	DGDEF3(hrestime, _MUL(2, CLONGSIZE), 8)
3442	.NWORD	0, 0
3443
3444	DGDEF3(hrestime_adj, 8, 8)
3445	.long	0, 0
3446
3447	DGDEF3(hres_last_tick, 8, 8)
3448	.long	0, 0
3449
3450	DGDEF3(timedelta, 8, 8)
3451	.long	0, 0
3452
3453	DGDEF3(hres_lock, 4, 8)
3454	.long	0
3455
3456	/*
3457	 * initialized to a non zero value to make pc_gethrtime()
3458	 * work correctly even before clock is initialized
3459	 */
3460	DGDEF3(hrtime_base, 8, 8)
3461	.long	_MUL(NSEC_PER_CLOCK_TICK, 6), 0
3462
3463	DGDEF3(adj_shift, 4, 4)
3464	.long	ADJ_SHIFT
3465
3466#if defined(__amd64)
3467
3468	ENTRY_NP(hres_tick)
3469	pushq	%rbp
3470	movq	%rsp, %rbp
3471
3472	/*
3473	 * We need to call *gethrtimef before picking up CLOCK_LOCK (obviously,
3474	 * hres_last_tick can only be modified while holding CLOCK_LOCK).
3475	 * At worst, performing this now instead of under CLOCK_LOCK may
3476	 * introduce some jitter in pc_gethrestime().
3477	 */
3478	call	*gethrtimef(%rip)
3479	movq	%rax, %r8
3480
3481	leaq	hres_lock(%rip), %rax
3482	movb	$-1, %dl
3483.CL1:
3484	xchgb	%dl, (%rax)
3485	testb	%dl, %dl
3486	jz	.CL3			/* got it */
3487.CL2:
3488	cmpb	$0, (%rax)		/* possible to get lock? */
3489	pause
3490	jne	.CL2
3491	jmp	.CL1			/* yes, try again */
3492.CL3:
3493	/*
3494	 * compute the interval since last time hres_tick was called
3495	 * and adjust hrtime_base and hrestime accordingly
3496	 * hrtime_base is an 8 byte value (in nsec), hrestime is
3497	 * a timestruc_t (sec, nsec)
3498	 */
3499	leaq	hres_last_tick(%rip), %rax
3500	movq	%r8, %r11
3501	subq	(%rax), %r8
3502	addq	%r8, hrtime_base(%rip)	/* add interval to hrtime_base */
3503	addq	%r8, hrestime+8(%rip)	/* add interval to hrestime.tv_nsec */
3504	/*
3505	 * Now that we have CLOCK_LOCK, we can update hres_last_tick
3506	 */
3507	movq	%r11, (%rax)
3508
3509	call	__adj_hrestime
3510
3511	/*
3512	 * release the hres_lock
3513	 */
3514	incl	hres_lock(%rip)
3515	leave
3516	ret
3517	SET_SIZE(hres_tick)
3518
3519#elif defined(__i386)
3520
3521	ENTRY_NP(hres_tick)
3522	pushl	%ebp
3523	movl	%esp, %ebp
3524	pushl	%esi
3525	pushl	%ebx
3526
3527	/*
3528	 * We need to call *gethrtimef before picking up CLOCK_LOCK (obviously,
3529	 * hres_last_tick can only be modified while holding CLOCK_LOCK).
3530	 * At worst, performing this now instead of under CLOCK_LOCK may
3531	 * introduce some jitter in pc_gethrestime().
3532	 */
3533	call	*gethrtimef
3534	movl	%eax, %ebx
3535	movl	%edx, %esi
3536
3537	movl	$hres_lock, %eax
3538	movl	$-1, %edx
3539.CL1:
3540	xchgb	%dl, (%eax)
3541	testb	%dl, %dl
3542	jz	.CL3			/ got it
3543.CL2:
3544	cmpb	$0, (%eax)		/ possible to get lock?
3545	pause
3546	jne	.CL2
3547	jmp	.CL1			/ yes, try again
3548.CL3:
3549	/*
3550	 * compute the interval since last time hres_tick was called
3551	 * and adjust hrtime_base and hrestime accordingly
3552	 * hrtime_base is an 8 byte value (in nsec), hrestime is
3553	 * timestruc_t (sec, nsec)
3554	 */
3555
3556	lea	hres_last_tick, %eax
3557
3558	movl	%ebx, %edx
3559	movl	%esi, %ecx
3560
3561	subl 	(%eax), %edx
3562	sbbl 	4(%eax), %ecx
3563
3564	addl	%edx, hrtime_base	/ add interval to hrtime_base
3565	adcl	%ecx, hrtime_base+4
3566
3567	addl 	%edx, hrestime+4	/ add interval to hrestime.tv_nsec
3568
3569	/
3570	/ Now that we have CLOCK_LOCK, we can update hres_last_tick.
3571	/
3572	movl	%ebx, (%eax)
3573	movl	%esi,  4(%eax)
3574
3575	/ get hrestime at this moment. used as base for pc_gethrestime
3576	/
3577	/ Apply adjustment, if any
3578	/
3579	/ #define HRES_ADJ	(NSEC_PER_CLOCK_TICK >> ADJ_SHIFT)
3580	/ (max_hres_adj)
3581	/
3582	/ void
3583	/ adj_hrestime()
3584	/ {
3585	/	long long adj;
3586	/
3587	/	if (hrestime_adj == 0)
3588	/		adj = 0;
3589	/	else if (hrestime_adj > 0) {
3590	/		if (hrestime_adj < HRES_ADJ)
3591	/			adj = hrestime_adj;
3592	/		else
3593	/			adj = HRES_ADJ;
3594	/	}
3595	/	else {
3596	/		if (hrestime_adj < -(HRES_ADJ))
3597	/			adj = -(HRES_ADJ);
3598	/		else
3599	/			adj = hrestime_adj;
3600	/	}
3601	/
3602	/	timedelta -= adj;
3603	/	hrestime_adj = timedelta;
3604	/	hrestime.tv_nsec += adj;
3605	/
3606	/	while (hrestime.tv_nsec >= NANOSEC) {
3607	/		one_sec++;
3608	/		hrestime.tv_sec++;
3609	/		hrestime.tv_nsec -= NANOSEC;
3610	/	}
3611	/ }
3612__adj_hrestime:
3613	movl	hrestime_adj, %esi	/ if (hrestime_adj == 0)
3614	movl	hrestime_adj+4, %edx
3615	andl	%esi, %esi
3616	jne	.CL4			/ no
3617	andl	%edx, %edx
3618	jne	.CL4			/ no
3619	subl	%ecx, %ecx		/ yes, adj = 0;
3620	subl	%edx, %edx
3621	jmp	.CL5
3622.CL4:
3623	subl	%ecx, %ecx
3624	subl	%eax, %eax
3625	subl	%esi, %ecx
3626	sbbl	%edx, %eax
3627	andl	%eax, %eax		/ if (hrestime_adj > 0)
3628	jge	.CL6
3629
3630	/ In the following comments, HRES_ADJ is used, while in the code
3631	/ max_hres_adj is used.
3632	/
3633	/ The test for "hrestime_adj < HRES_ADJ" is complicated because
3634	/ hrestime_adj is 64-bits, while HRES_ADJ is 32-bits.  We rely
3635	/ on the logical equivalence of:
3636	/
3637	/	!(hrestime_adj < HRES_ADJ)
3638	/
3639	/ and the two step sequence:
3640	/
3641	/	(HRES_ADJ - lsw(hrestime_adj)) generates a Borrow/Carry
3642	/
3643	/ which computes whether or not the least significant 32-bits
3644	/ of hrestime_adj is greater than HRES_ADJ, followed by:
3645	/
3646	/	Previous Borrow/Carry + -1 + msw(hrestime_adj) generates a Carry
3647	/
3648	/ which generates a carry whenever step 1 is true or the most
3649	/ significant long of the longlong hrestime_adj is non-zero.
3650
3651	movl	max_hres_adj, %ecx	/ hrestime_adj is positive
3652	subl	%esi, %ecx
3653	movl	%edx, %eax
3654	adcl	$-1, %eax
3655	jnc	.CL7
3656	movl	max_hres_adj, %ecx	/ adj = HRES_ADJ;
3657	subl	%edx, %edx
3658	jmp	.CL5
3659
3660	/ The following computation is similar to the one above.
3661	/
3662	/ The test for "hrestime_adj < -(HRES_ADJ)" is complicated because
3663	/ hrestime_adj is 64-bits, while HRES_ADJ is 32-bits.  We rely
3664	/ on the logical equivalence of:
3665	/
3666	/	(hrestime_adj > -HRES_ADJ)
3667	/
3668	/ and the two step sequence:
3669	/
3670	/	(HRES_ADJ + lsw(hrestime_adj)) generates a Carry
3671	/
3672	/ which means the least significant 32-bits of hrestime_adj is
3673	/ greater than -HRES_ADJ, followed by:
3674	/
3675	/	Previous Carry + 0 + msw(hrestime_adj) generates a Carry
3676	/
3677	/ which generates a carry only when step 1 is true and the most
3678	/ significant long of the longlong hrestime_adj is -1.
3679
3680.CL6:					/ hrestime_adj is negative
3681	movl	%esi, %ecx
3682	addl	max_hres_adj, %ecx
3683	movl	%edx, %eax
3684	adcl	$0, %eax
3685	jc	.CL7
3686	xor	%ecx, %ecx
3687	subl	max_hres_adj, %ecx	/ adj = -(HRES_ADJ);
3688	movl	$-1, %edx
3689	jmp	.CL5
3690.CL7:
3691	movl	%esi, %ecx		/ adj = hrestime_adj;
3692.CL5:
3693	movl	timedelta, %esi
3694	subl	%ecx, %esi
3695	movl	timedelta+4, %eax
3696	sbbl	%edx, %eax
3697	movl	%esi, timedelta
3698	movl	%eax, timedelta+4	/ timedelta -= adj;
3699	movl	%esi, hrestime_adj
3700	movl	%eax, hrestime_adj+4	/ hrestime_adj = timedelta;
3701	addl	hrestime+4, %ecx
3702
3703	movl	%ecx, %eax		/ eax = tv_nsec
37041:
3705	cmpl	$NANOSEC, %eax		/ if ((unsigned long)tv_nsec >= NANOSEC)
3706	jb	.CL8			/ no
3707	incl	one_sec			/ yes,  one_sec++;
3708	incl	hrestime		/ hrestime.tv_sec++;
3709	addl	$-NANOSEC, %eax		/ tv_nsec -= NANOSEC
3710	jmp	1b			/ check for more seconds
3711
3712.CL8:
3713	movl	%eax, hrestime+4	/ store final into hrestime.tv_nsec
3714	incl	hres_lock		/ release the hres_lock
3715
3716	popl	%ebx
3717	popl	%esi
3718	leave
3719	ret
3720	SET_SIZE(hres_tick)
3721
3722#endif	/* __i386 */
3723#endif	/* __lint */
3724
3725/*
3726 * void prefetch_smap_w(void *)
3727 *
3728 * Prefetch ahead within a linear list of smap structures.
3729 * Not implemented for ia32.  Stub for compatibility.
3730 */
3731
3732#if defined(__lint)
3733
3734/*ARGSUSED*/
3735void prefetch_smap_w(void *smp)
3736{}
3737
3738#else	/* __lint */
3739
3740	ENTRY(prefetch_smap_w)
3741	rep;	ret	/* use 2 byte return instruction when branch target */
3742			/* AMD Software Optimization Guide - Section 6.2 */
3743	SET_SIZE(prefetch_smap_w)
3744
3745#endif	/* __lint */
3746
3747/*
3748 * prefetch_page_r(page_t *)
3749 * issue prefetch instructions for a page_t
3750 */
3751#if defined(__lint)
3752
3753/*ARGSUSED*/
3754void
3755prefetch_page_r(void *pp)
3756{}
3757
3758#else	/* __lint */
3759
3760	ENTRY(prefetch_page_r)
3761	rep;	ret	/* use 2 byte return instruction when branch target */
3762			/* AMD Software Optimization Guide - Section 6.2 */
3763	SET_SIZE(prefetch_page_r)
3764
3765#endif	/* __lint */
3766
3767#if defined(__lint)
3768
3769/*ARGSUSED*/
3770int
3771bcmp(const void *s1, const void *s2, size_t count)
3772{ return (0); }
3773
3774#else   /* __lint */
3775
3776#if defined(__amd64)
3777
3778	ENTRY(bcmp)
3779	pushq	%rbp
3780	movq	%rsp, %rbp
3781#ifdef DEBUG
3782	movq	postbootkernelbase(%rip), %r11
3783	cmpq	%r11, %rdi
3784	jb	0f
3785	cmpq	%r11, %rsi
3786	jnb	1f
37870:	leaq	.bcmp_panic_msg(%rip), %rdi
3788	xorl	%eax, %eax
3789	call	panic
37901:
3791#endif	/* DEBUG */
3792	call	memcmp
3793	testl	%eax, %eax
3794	setne	%dl
3795	leave
3796	movzbl	%dl, %eax
3797	ret
3798	SET_SIZE(bcmp)
3799
3800#elif defined(__i386)
3801
3802#define	ARG_S1		8
3803#define	ARG_S2		12
3804#define	ARG_LENGTH	16
3805
3806	ENTRY(bcmp)
3807	pushl	%ebp
3808	movl	%esp, %ebp	/ create new stack frame
3809#ifdef DEBUG
3810	movl    postbootkernelbase, %eax
3811	cmpl    %eax, ARG_S1(%ebp)
3812	jb	0f
3813	cmpl    %eax, ARG_S2(%ebp)
3814	jnb	1f
38150:	pushl   $.bcmp_panic_msg
3816	call    panic
38171:
3818#endif	/* DEBUG */
3819
3820	pushl	%edi		/ save register variable
3821	movl	ARG_S1(%ebp), %eax	/ %eax = address of string 1
3822	movl	ARG_S2(%ebp), %ecx	/ %ecx = address of string 2
3823	cmpl	%eax, %ecx	/ if the same string
3824	je	.equal		/ goto .equal
3825	movl	ARG_LENGTH(%ebp), %edi	/ %edi = length in bytes
3826	cmpl	$4, %edi	/ if %edi < 4
3827	jb	.byte_check	/ goto .byte_check
3828	.align	4
3829.word_loop:
3830	movl	(%ecx), %edx	/ move 1 word from (%ecx) to %edx
3831	leal	-4(%edi), %edi	/ %edi -= 4
3832	cmpl	(%eax), %edx	/ compare 1 word from (%eax) with %edx
3833	jne	.word_not_equal	/ if not equal, goto .word_not_equal
3834	leal	4(%ecx), %ecx	/ %ecx += 4 (next word)
3835	leal	4(%eax), %eax	/ %eax += 4 (next word)
3836	cmpl	$4, %edi	/ if %edi >= 4
3837	jae	.word_loop	/ goto .word_loop
3838.byte_check:
3839	cmpl	$0, %edi	/ if %edi == 0
3840	je	.equal		/ goto .equal
3841	jmp	.byte_loop	/ goto .byte_loop (checks in bytes)
3842.word_not_equal:
3843	leal	4(%edi), %edi	/ %edi += 4 (post-decremented)
3844	.align	4
3845.byte_loop:
3846	movb	(%ecx),	%dl	/ move 1 byte from (%ecx) to %dl
3847	cmpb	%dl, (%eax)	/ compare %dl with 1 byte from (%eax)
3848	jne	.not_equal	/ if not equal, goto .not_equal
3849	incl	%ecx		/ %ecx++ (next byte)
3850	incl	%eax		/ %eax++ (next byte)
3851	decl	%edi		/ %edi--
3852	jnz	.byte_loop	/ if not zero, goto .byte_loop
3853.equal:
3854	xorl	%eax, %eax	/ %eax = 0
3855	popl	%edi		/ restore register variable
3856	leave			/ restore old stack frame
3857	ret			/ return (NULL)
3858	.align	4
3859.not_equal:
3860	movl	$1, %eax	/ return 1
3861	popl	%edi		/ restore register variable
3862	leave			/ restore old stack frame
3863	ret			/ return (NULL)
3864	SET_SIZE(bcmp)
3865
3866#endif	/* __i386 */
3867
3868#ifdef DEBUG
3869	.text
3870.bcmp_panic_msg:
3871	.string "bcmp: arguments below kernelbase"
3872#endif	/* DEBUG */
3873
3874#endif	/* __lint */
3875
3876#if defined(__lint)
3877
3878uint_t
3879bsrw_insn(uint16_t mask)
3880{
3881	uint_t index = sizeof (mask) * NBBY - 1;
3882
3883	while ((mask & (1 << index)) == 0)
3884		index--;
3885	return (index);
3886}
3887
3888#else	/* __lint */
3889
3890#if defined(__amd64)
3891
3892	ENTRY_NP(bsrw_insn)
3893	xorl	%eax, %eax
3894	bsrw	%di, %ax
3895	ret
3896	SET_SIZE(bsrw_insn)
3897
3898#elif defined(__i386)
3899
3900	ENTRY_NP(bsrw_insn)
3901	movw	4(%esp), %cx
3902	xorl	%eax, %eax
3903	bsrw	%cx, %ax
3904	ret
3905	SET_SIZE(bsrw_insn)
3906
3907#endif	/* __i386 */
3908#endif	/* __lint */
3909
3910#if defined(__lint)
3911
3912uint_t
3913atomic_btr32(uint32_t *pending, uint_t pil)
3914{
3915	return (*pending &= ~(1 << pil));
3916}
3917
3918#else	/* __lint */
3919
3920#if defined(__i386)
3921
3922	ENTRY_NP(atomic_btr32)
3923	movl	4(%esp), %ecx
3924	movl	8(%esp), %edx
3925	xorl	%eax, %eax
3926	lock
3927	btrl	%edx, (%ecx)
3928	setc	%al
3929	ret
3930	SET_SIZE(atomic_btr32)
3931
3932#endif	/* __i386 */
3933#endif	/* __lint */
3934
3935#if defined(__lint)
3936
3937/*ARGSUSED*/
3938void
3939switch_sp_and_call(void *newsp, void (*func)(uint_t, uint_t), uint_t arg1,
3940	    uint_t arg2)
3941{}
3942
3943#else	/* __lint */
3944
3945#if defined(__amd64)
3946
3947	ENTRY_NP(switch_sp_and_call)
3948	pushq	%rbp
3949	movq	%rsp, %rbp		/* set up stack frame */
3950	movq	%rdi, %rsp		/* switch stack pointer */
3951	movq	%rdx, %rdi		/* pass func arg 1 */
3952	movq	%rsi, %r11		/* save function to call */
3953	movq	%rcx, %rsi		/* pass func arg 2 */
3954	call	*%r11			/* call function */
3955	leave				/* restore stack */
3956	ret
3957	SET_SIZE(switch_sp_and_call)
3958
3959#elif defined(__i386)
3960
3961	ENTRY_NP(switch_sp_and_call)
3962	pushl	%ebp
3963	mov	%esp, %ebp		/* set up stack frame */
3964	movl	8(%ebp), %esp		/* switch stack pointer */
3965	pushl	20(%ebp)		/* push func arg 2 */
3966	pushl	16(%ebp)		/* push func arg 1 */
3967	call	*12(%ebp)		/* call function */
3968	addl	$8, %esp		/* pop arguments */
3969	leave				/* restore stack */
3970	ret
3971	SET_SIZE(switch_sp_and_call)
3972
3973#endif	/* __i386 */
3974#endif	/* __lint */
3975
3976#if defined(__lint)
3977
3978void
3979kmdb_enter(void)
3980{}
3981
3982#else	/* __lint */
3983
3984#if defined(__amd64)
3985
3986	ENTRY_NP(kmdb_enter)
3987	pushq	%rbp
3988	movq	%rsp, %rbp
3989
3990	/*
3991	 * Save flags, do a 'cli' then return the saved flags
3992	 */
3993	call	intr_clear
3994
3995	int	$T_DBGENTR
3996
3997	/*
3998	 * Restore the saved flags
3999	 */
4000	movq	%rax, %rdi
4001	call	intr_restore
4002
4003	leave
4004	ret
4005	SET_SIZE(kmdb_enter)
4006
4007#elif defined(__i386)
4008
4009	ENTRY_NP(kmdb_enter)
4010	pushl	%ebp
4011	movl	%esp, %ebp
4012
4013	/*
4014	 * Save flags, do a 'cli' then return the saved flags
4015	 */
4016	call	intr_clear
4017
4018	int	$T_DBGENTR
4019
4020	/*
4021	 * Restore the saved flags
4022	 */
4023	pushl	%eax
4024	call	intr_restore
4025	addl	$4, %esp
4026
4027	leave
4028	ret
4029	SET_SIZE(kmdb_enter)
4030
4031#endif	/* __i386 */
4032#endif	/* __lint */
4033
4034#if defined(__lint)
4035
4036void
4037return_instr(void)
4038{}
4039
4040#else	/* __lint */
4041
4042	ENTRY_NP(return_instr)
4043	rep;	ret	/* use 2 byte instruction when branch target */
4044			/* AMD Software Optimization Guide - Section 6.2 */
4045	SET_SIZE(return_instr)
4046
4047#endif	/* __lint */
4048
4049#if defined(__lint)
4050
4051ulong_t
4052getflags(void)
4053{
4054	return (0);
4055}
4056
4057#else	/* __lint */
4058
4059#if defined(__amd64)
4060
4061	ENTRY(getflags)
4062	pushfq
4063	popq	%rax
4064#if defined(__xpv)
4065	CURTHREAD(%rdi)
4066	KPREEMPT_DISABLE(%rdi)
4067	/*
4068	 * Synthesize the PS_IE bit from the event mask bit
4069	 */
4070	CURVCPU(%r11)
4071	andq    $_BITNOT(PS_IE), %rax
4072	XEN_TEST_UPCALL_MASK(%r11)
4073	jnz	1f
4074	orq	$PS_IE, %rax
40751:
4076	KPREEMPT_ENABLE_NOKP(%rdi)
4077#endif
4078	ret
4079	SET_SIZE(getflags)
4080
4081#elif defined(__i386)
4082
4083	ENTRY(getflags)
4084	pushfl
4085	popl	%eax
4086#if defined(__xpv)
4087	CURTHREAD(%ecx)
4088	KPREEMPT_DISABLE(%ecx)
4089	/*
4090	 * Synthesize the PS_IE bit from the event mask bit
4091	 */
4092	CURVCPU(%edx)
4093	andl    $_BITNOT(PS_IE), %eax
4094	XEN_TEST_UPCALL_MASK(%edx)
4095	jnz	1f
4096	orl	$PS_IE, %eax
40971:
4098	KPREEMPT_ENABLE_NOKP(%ecx)
4099#endif
4100	ret
4101	SET_SIZE(getflags)
4102
4103#endif	/* __i386 */
4104
4105#endif	/* __lint */
4106
4107#if defined(__lint)
4108
4109ftrace_icookie_t
4110ftrace_interrupt_disable(void)
4111{ return (0); }
4112
4113#else   /* __lint */
4114
4115#if defined(__amd64)
4116
4117	ENTRY(ftrace_interrupt_disable)
4118	pushfq
4119	popq	%rax
4120	CLI(%rdx)
4121	ret
4122	SET_SIZE(ftrace_interrupt_disable)
4123
4124#elif defined(__i386)
4125
4126	ENTRY(ftrace_interrupt_disable)
4127	pushfl
4128	popl	%eax
4129	CLI(%edx)
4130	ret
4131	SET_SIZE(ftrace_interrupt_disable)
4132
4133#endif	/* __i386 */
4134#endif	/* __lint */
4135
4136#if defined(__lint)
4137
4138/*ARGSUSED*/
4139void
4140ftrace_interrupt_enable(ftrace_icookie_t cookie)
4141{}
4142
4143#else	/* __lint */
4144
4145#if defined(__amd64)
4146
4147	ENTRY(ftrace_interrupt_enable)
4148	pushq	%rdi
4149	popfq
4150	ret
4151	SET_SIZE(ftrace_interrupt_enable)
4152
4153#elif defined(__i386)
4154
4155	ENTRY(ftrace_interrupt_enable)
4156	movl	4(%esp), %eax
4157	pushl	%eax
4158	popfl
4159	ret
4160	SET_SIZE(ftrace_interrupt_enable)
4161
4162#endif	/* __i386 */
4163#endif	/* __lint */
4164