xref: /titanic_41/usr/src/uts/intel/ia32/ml/i86_subr.s (revision c6c9aed4d309e3d11be652b85e3bf8bb72f20c87)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*
28 *  Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.
29 *  Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T
30 *    All Rights Reserved
31 */
32
33#pragma ident	"%Z%%M%	%I%	%E% SMI"
34
35/*
36 * General assembly language routines.
37 * It is the intent of this file to contain routines that are
38 * independent of the specific kernel architecture, and those that are
39 * common across kernel architectures.
40 * As architectures diverge, and implementations of specific
41 * architecture-dependent routines change, the routines should be moved
42 * from this file into the respective ../`arch -k`/subr.s file.
43 */
44
45#include <sys/asm_linkage.h>
46#include <sys/asm_misc.h>
47#include <sys/panic.h>
48#include <sys/ontrap.h>
49#include <sys/regset.h>
50#include <sys/privregs.h>
51#include <sys/reboot.h>
52#include <sys/psw.h>
53#include <sys/x86_archext.h>
54
55#if defined(__lint)
56#include <sys/types.h>
57#include <sys/systm.h>
58#include <sys/thread.h>
59#include <sys/archsystm.h>
60#include <sys/byteorder.h>
61#include <sys/dtrace.h>
62#include <sys/ftrace.h>
63#else	/* __lint */
64#include "assym.h"
65#endif	/* __lint */
66#include <sys/dditypes.h>
67
68/*
69 * on_fault()
70 * Catch lofault faults. Like setjmp except it returns one
71 * if code following causes uncorrectable fault. Turned off
72 * by calling no_fault().
73 */
74
75#if defined(__lint)
76
77/* ARGSUSED */
78int
79on_fault(label_t *ljb)
80{ return (0); }
81
82void
83no_fault(void)
84{}
85
86#else	/* __lint */
87
88#if defined(__amd64)
89
90	ENTRY(on_fault)
91	movq	%gs:CPU_THREAD, %rsi
92	leaq	catch_fault(%rip), %rdx
93	movq	%rdi, T_ONFAULT(%rsi)		/* jumpbuf in t_onfault */
94	movq	%rdx, T_LOFAULT(%rsi)		/* catch_fault in t_lofault */
95	jmp	setjmp				/* let setjmp do the rest */
96
97catch_fault:
98	movq	%gs:CPU_THREAD, %rsi
99	movq	T_ONFAULT(%rsi), %rdi		/* address of save area */
100	xorl	%eax, %eax
101	movq	%rax, T_ONFAULT(%rsi)		/* turn off onfault */
102	movq	%rax, T_LOFAULT(%rsi)		/* turn off lofault */
103	jmp	longjmp				/* let longjmp do the rest */
104	SET_SIZE(on_fault)
105
106	ENTRY(no_fault)
107	movq	%gs:CPU_THREAD, %rsi
108	xorl	%eax, %eax
109	movq	%rax, T_ONFAULT(%rsi)		/* turn off onfault */
110	movq	%rax, T_LOFAULT(%rsi)		/* turn off lofault */
111	ret
112	SET_SIZE(no_fault)
113
114#elif defined(__i386)
115
116	ENTRY(on_fault)
117	movl	%gs:CPU_THREAD, %edx
118	movl	4(%esp), %eax			/* jumpbuf address */
119	leal	catch_fault, %ecx
120	movl	%eax, T_ONFAULT(%edx)		/* jumpbuf in t_onfault */
121	movl	%ecx, T_LOFAULT(%edx)		/* catch_fault in t_lofault */
122	jmp	setjmp				/* let setjmp do the rest */
123
124catch_fault:
125	movl	%gs:CPU_THREAD, %edx
126	xorl	%eax, %eax
127	movl	T_ONFAULT(%edx), %ecx		/* address of save area */
128	movl	%eax, T_ONFAULT(%edx)		/* turn off onfault */
129	movl	%eax, T_LOFAULT(%edx)		/* turn off lofault */
130	pushl	%ecx
131	call	longjmp				/* let longjmp do the rest */
132	SET_SIZE(on_fault)
133
134	ENTRY(no_fault)
135	movl	%gs:CPU_THREAD, %edx
136	xorl	%eax, %eax
137	movl	%eax, T_ONFAULT(%edx)		/* turn off onfault */
138	movl	%eax, T_LOFAULT(%edx)		/* turn off lofault */
139	ret
140	SET_SIZE(no_fault)
141
142#endif	/* __i386 */
143#endif	/* __lint */
144
145/*
146 * Default trampoline code for on_trap() (see <sys/ontrap.h>).  We just
147 * do a longjmp(&curthread->t_ontrap->ot_jmpbuf) if this is ever called.
148 */
149
150#if defined(lint)
151
152void
153on_trap_trampoline(void)
154{}
155
156#else	/* __lint */
157
158#if defined(__amd64)
159
160	ENTRY(on_trap_trampoline)
161	movq	%gs:CPU_THREAD, %rsi
162	movq	T_ONTRAP(%rsi), %rdi
163	addq	$OT_JMPBUF, %rdi
164	jmp	longjmp
165	SET_SIZE(on_trap_trampoline)
166
167#elif defined(__i386)
168
169	ENTRY(on_trap_trampoline)
170	movl	%gs:CPU_THREAD, %eax
171	movl	T_ONTRAP(%eax), %eax
172	addl	$OT_JMPBUF, %eax
173	pushl	%eax
174	call	longjmp
175	SET_SIZE(on_trap_trampoline)
176
177#endif	/* __i386 */
178#endif	/* __lint */
179
180/*
181 * Push a new element on to the t_ontrap stack.  Refer to <sys/ontrap.h> for
182 * more information about the on_trap() mechanism.  If the on_trap_data is the
183 * same as the topmost stack element, we just modify that element.
184 */
185#if defined(lint)
186
187/*ARGSUSED*/
188int
189on_trap(on_trap_data_t *otp, uint_t prot)
190{ return (0); }
191
192#else	/* __lint */
193
194#if defined(__amd64)
195
196	ENTRY(on_trap)
197	movw	%si, OT_PROT(%rdi)		/* ot_prot = prot */
198	movw	$0, OT_TRAP(%rdi)		/* ot_trap = 0 */
199	leaq	on_trap_trampoline(%rip), %rdx	/* rdx = &on_trap_trampoline */
200	movq	%rdx, OT_TRAMPOLINE(%rdi)	/* ot_trampoline = rdx */
201	xorl	%ecx, %ecx
202	movq	%rcx, OT_HANDLE(%rdi)		/* ot_handle = NULL */
203	movq	%rcx, OT_PAD1(%rdi)		/* ot_pad1 = NULL */
204	movq	%gs:CPU_THREAD, %rdx		/* rdx = curthread */
205	movq	T_ONTRAP(%rdx), %rcx		/* rcx = curthread->t_ontrap */
206	cmpq	%rdi, %rcx			/* if (otp == %rcx)	*/
207	je	0f				/*	don't modify t_ontrap */
208
209	movq	%rcx, OT_PREV(%rdi)		/* ot_prev = t_ontrap */
210	movq	%rdi, T_ONTRAP(%rdx)		/* curthread->t_ontrap = otp */
211
2120:	addq	$OT_JMPBUF, %rdi		/* &ot_jmpbuf */
213	jmp	setjmp
214	SET_SIZE(on_trap)
215
216#elif defined(__i386)
217
218	ENTRY(on_trap)
219	movl	4(%esp), %eax			/* %eax = otp */
220	movl	8(%esp), %edx			/* %edx = prot */
221
222	movw	%dx, OT_PROT(%eax)		/* ot_prot = prot */
223	movw	$0, OT_TRAP(%eax)		/* ot_trap = 0 */
224	leal	on_trap_trampoline, %edx	/* %edx = &on_trap_trampoline */
225	movl	%edx, OT_TRAMPOLINE(%eax)	/* ot_trampoline = %edx */
226	movl	$0, OT_HANDLE(%eax)		/* ot_handle = NULL */
227	movl	$0, OT_PAD1(%eax)		/* ot_pad1 = NULL */
228	movl	%gs:CPU_THREAD, %edx		/* %edx = curthread */
229	movl	T_ONTRAP(%edx), %ecx		/* %ecx = curthread->t_ontrap */
230	cmpl	%eax, %ecx			/* if (otp == %ecx) */
231	je	0f				/*    don't modify t_ontrap */
232
233	movl	%ecx, OT_PREV(%eax)		/* ot_prev = t_ontrap */
234	movl	%eax, T_ONTRAP(%edx)		/* curthread->t_ontrap = otp */
235
2360:	addl	$OT_JMPBUF, %eax		/* %eax = &ot_jmpbuf */
237	movl	%eax, 4(%esp)			/* put %eax back on the stack */
238	jmp	setjmp				/* let setjmp do the rest */
239	SET_SIZE(on_trap)
240
241#endif	/* __i386 */
242#endif	/* __lint */
243
244/*
245 * Setjmp and longjmp implement non-local gotos using state vectors
246 * type label_t.
247 */
248
249#if defined(__lint)
250
251/* ARGSUSED */
252int
253setjmp(label_t *lp)
254{ return (0); }
255
256/* ARGSUSED */
257void
258longjmp(label_t *lp)
259{}
260
261#else	/* __lint */
262
263#if LABEL_PC != 0
264#error LABEL_PC MUST be defined as 0 for setjmp/longjmp to work as coded
265#endif	/* LABEL_PC != 0 */
266
267#if defined(__amd64)
268
269	ENTRY(setjmp)
270	movq	%rsp, LABEL_SP(%rdi)
271	movq	%rbp, LABEL_RBP(%rdi)
272	movq	%rbx, LABEL_RBX(%rdi)
273	movq	%r12, LABEL_R12(%rdi)
274	movq	%r13, LABEL_R13(%rdi)
275	movq	%r14, LABEL_R14(%rdi)
276	movq	%r15, LABEL_R15(%rdi)
277	movq	(%rsp), %rdx		/* return address */
278	movq	%rdx, (%rdi)		/* LABEL_PC is 0 */
279	xorl	%eax, %eax		/* return 0 */
280	ret
281	SET_SIZE(setjmp)
282
283	ENTRY(longjmp)
284	movq	LABEL_SP(%rdi), %rsp
285	movq	LABEL_RBP(%rdi), %rbp
286	movq	LABEL_RBX(%rdi), %rbx
287	movq	LABEL_R12(%rdi), %r12
288	movq	LABEL_R13(%rdi), %r13
289	movq	LABEL_R14(%rdi), %r14
290	movq	LABEL_R15(%rdi), %r15
291	movq	(%rdi), %rdx		/* return address; LABEL_PC is 0 */
292	movq	%rdx, (%rsp)
293	xorl	%eax, %eax
294	incl	%eax			/* return 1 */
295	ret
296	SET_SIZE(longjmp)
297
298#elif defined(__i386)
299
300	ENTRY(setjmp)
301	movl	4(%esp), %edx		/* address of save area */
302	movl	%ebp, LABEL_EBP(%edx)
303	movl	%ebx, LABEL_EBX(%edx)
304	movl	%esi, LABEL_ESI(%edx)
305	movl	%edi, LABEL_EDI(%edx)
306	movl	%esp, 4(%edx)
307	movl	(%esp), %ecx		/* %eip (return address) */
308	movl	%ecx, (%edx)		/* LABEL_PC is 0 */
309	subl	%eax, %eax		/* return 0 */
310	ret
311	SET_SIZE(setjmp)
312
313	ENTRY(longjmp)
314	movl	4(%esp), %edx		/* address of save area */
315	movl	LABEL_EBP(%edx), %ebp
316	movl	LABEL_EBX(%edx), %ebx
317	movl	LABEL_ESI(%edx), %esi
318	movl	LABEL_EDI(%edx), %edi
319	movl	4(%edx), %esp
320	movl	(%edx), %ecx		/* %eip (return addr); LABEL_PC is 0 */
321	movl	$1, %eax
322	addl	$4, %esp		/* pop ret adr */
323	jmp	*%ecx			/* indirect */
324	SET_SIZE(longjmp)
325
326#endif	/* __i386 */
327#endif	/* __lint */
328
329/*
330 * if a() calls b() calls caller(),
331 * caller() returns return address in a().
332 * (Note: We assume a() and b() are C routines which do the normal entry/exit
333 *  sequence.)
334 */
335
336#if defined(__lint)
337
338caddr_t
339caller(void)
340{ return (0); }
341
342#else	/* __lint */
343
344#if defined(__amd64)
345
346	ENTRY(caller)
347	movq	8(%rbp), %rax		/* b()'s return pc, in a() */
348	ret
349	SET_SIZE(caller)
350
351#elif defined(__i386)
352
353	ENTRY(caller)
354	movl	4(%ebp), %eax		/* b()'s return pc, in a() */
355	ret
356	SET_SIZE(caller)
357
358#endif	/* __i386 */
359#endif	/* __lint */
360
361/*
362 * if a() calls callee(), callee() returns the
363 * return address in a();
364 */
365
366#if defined(__lint)
367
368caddr_t
369callee(void)
370{ return (0); }
371
372#else	/* __lint */
373
374#if defined(__amd64)
375
376	ENTRY(callee)
377	movq	(%rsp), %rax		/* callee()'s return pc, in a() */
378	ret
379	SET_SIZE(callee)
380
381#elif defined(__i386)
382
383	ENTRY(callee)
384	movl	(%esp), %eax		/* callee()'s return pc, in a() */
385	ret
386	SET_SIZE(callee)
387
388#endif	/* __i386 */
389#endif	/* __lint */
390
391/*
392 * return the current frame pointer
393 */
394
395#if defined(__lint)
396
397greg_t
398getfp(void)
399{ return (0); }
400
401#else	/* __lint */
402
403#if defined(__amd64)
404
405	ENTRY(getfp)
406	movq	%rbp, %rax
407	ret
408	SET_SIZE(getfp)
409
410#elif defined(__i386)
411
412	ENTRY(getfp)
413	movl	%ebp, %eax
414	ret
415	SET_SIZE(getfp)
416
417#endif	/* __i386 */
418#endif	/* __lint */
419
420/*
421 * Invalidate a single page table entry in the TLB
422 */
423
424#if defined(__lint)
425
426/* ARGSUSED */
427void
428mmu_tlbflush_entry(caddr_t m)
429{}
430
431#else	/* __lint */
432
433#if defined(__amd64)
434
435	ENTRY(mmu_tlbflush_entry)
436	invlpg	(%rdi)
437	ret
438	SET_SIZE(mmu_tlbflush_entry)
439
440#elif defined(__i386)
441
442	ENTRY(mmu_tlbflush_entry)
443	movl	4(%esp), %eax
444	invlpg	(%eax)
445	ret
446	SET_SIZE(mmu_tlbflush_entry)
447
448#endif	/* __i386 */
449#endif	/* __lint */
450
451
452/*
453 * Get/Set the value of various control registers
454 */
455
456#if defined(__lint)
457
458ulong_t
459getcr0(void)
460{ return (0); }
461
462/* ARGSUSED */
463void
464setcr0(ulong_t value)
465{}
466
467ulong_t
468getcr2(void)
469{ return (0); }
470
471ulong_t
472getcr3(void)
473{ return (0); }
474
475#if !defined(__xpv)
476/* ARGSUSED */
477void
478setcr3(ulong_t val)
479{}
480
481void
482reload_cr3(void)
483{}
484#endif
485
486ulong_t
487getcr4(void)
488{ return (0); }
489
490/* ARGSUSED */
491void
492setcr4(ulong_t val)
493{}
494
495#if defined(__amd64)
496
497ulong_t
498getcr8(void)
499{ return (0); }
500
501/* ARGSUSED */
502void
503setcr8(ulong_t val)
504{}
505
506#endif	/* __amd64 */
507
508#else	/* __lint */
509
510#if defined(__amd64)
511
512	ENTRY(getcr0)
513	movq	%cr0, %rax
514	ret
515	SET_SIZE(getcr0)
516
517	ENTRY(setcr0)
518	movq	%rdi, %cr0
519	ret
520	SET_SIZE(setcr0)
521
522        ENTRY(getcr2)
523#if defined(__xpv)
524	movq	%gs:CPU_VCPU_INFO, %rax
525	movq	VCPU_INFO_ARCH_CR2(%rax), %rax
526#else
527        movq    %cr2, %rax
528#endif
529        ret
530	SET_SIZE(getcr2)
531
532	ENTRY(getcr3)
533	movq    %cr3, %rax
534	ret
535	SET_SIZE(getcr3)
536
537#if !defined(__xpv)
538
539        ENTRY(setcr3)
540        movq    %rdi, %cr3
541        ret
542	SET_SIZE(setcr3)
543
544	ENTRY(reload_cr3)
545	movq	%cr3, %rdi
546	movq	%rdi, %cr3
547	ret
548	SET_SIZE(reload_cr3)
549
550#endif	/* __xpv */
551
552	ENTRY(getcr4)
553	movq	%cr4, %rax
554	ret
555	SET_SIZE(getcr4)
556
557	ENTRY(setcr4)
558	movq	%rdi, %cr4
559	ret
560	SET_SIZE(setcr4)
561
562	ENTRY(getcr8)
563	movq	%cr8, %rax
564	ret
565	SET_SIZE(getcr8)
566
567	ENTRY(setcr8)
568	movq	%rdi, %cr8
569	ret
570	SET_SIZE(setcr8)
571
572#elif defined(__i386)
573
574        ENTRY(getcr0)
575        movl    %cr0, %eax
576        ret
577	SET_SIZE(getcr0)
578
579        ENTRY(setcr0)
580        movl    4(%esp), %eax
581        movl    %eax, %cr0
582        ret
583	SET_SIZE(setcr0)
584
585        ENTRY(getcr2)
586#if defined(__xpv)
587	movl	%gs:CPU_VCPU_INFO, %eax
588	movl	VCPU_INFO_ARCH_CR2(%eax), %eax
589#else
590        movl    %cr2, %eax
591#endif
592        ret
593	SET_SIZE(getcr2)
594
595	ENTRY(getcr3)
596	movl    %cr3, %eax
597	ret
598	SET_SIZE(getcr3)
599
600#if !defined(__xpv)
601
602        ENTRY(setcr3)
603        movl    4(%esp), %eax
604        movl    %eax, %cr3
605        ret
606	SET_SIZE(setcr3)
607
608	ENTRY(reload_cr3)
609	movl    %cr3, %eax
610	movl    %eax, %cr3
611	ret
612	SET_SIZE(reload_cr3)
613
614#endif	/* __xpv */
615
616	ENTRY(getcr4)
617	movl    %cr4, %eax
618	ret
619	SET_SIZE(getcr4)
620
621        ENTRY(setcr4)
622        movl    4(%esp), %eax
623        movl    %eax, %cr4
624        ret
625	SET_SIZE(setcr4)
626
627#endif	/* __i386 */
628#endif	/* __lint */
629
630#if defined(__lint)
631
632/*ARGSUSED*/
633uint32_t
634__cpuid_insn(struct cpuid_regs *regs)
635{ return (0); }
636
637#else	/* __lint */
638
639#if defined(__amd64)
640
641	ENTRY(__cpuid_insn)
642	movq	%rbx, %r8
643	movq	%rcx, %r9
644	movq	%rdx, %r11
645	movl	(%rdi), %eax		/* %eax = regs->cp_eax */
646	movl	0x4(%rdi), %ebx		/* %ebx = regs->cp_ebx */
647	movl	0x8(%rdi), %ecx		/* %ecx = regs->cp_ecx */
648	movl	0xc(%rdi), %edx		/* %edx = regs->cp_edx */
649	cpuid
650	movl	%eax, (%rdi)		/* regs->cp_eax = %eax */
651	movl	%ebx, 0x4(%rdi)		/* regs->cp_ebx = %ebx */
652	movl	%ecx, 0x8(%rdi)		/* regs->cp_ecx = %ecx */
653	movl	%edx, 0xc(%rdi)		/* regs->cp_edx = %edx */
654	movq	%r8, %rbx
655	movq	%r9, %rcx
656	movq	%r11, %rdx
657	ret
658	SET_SIZE(__cpuid_insn)
659
660#elif defined(__i386)
661
662        ENTRY(__cpuid_insn)
663	pushl	%ebp
664	movl	0x8(%esp), %ebp		/* %ebp = regs */
665	pushl	%ebx
666	pushl	%ecx
667	pushl	%edx
668	movl	(%ebp), %eax		/* %eax = regs->cp_eax */
669	movl	0x4(%ebp), %ebx		/* %ebx = regs->cp_ebx */
670	movl	0x8(%ebp), %ecx		/* %ecx = regs->cp_ecx */
671	movl	0xc(%ebp), %edx		/* %edx = regs->cp_edx */
672	cpuid
673	movl	%eax, (%ebp)		/* regs->cp_eax = %eax */
674	movl	%ebx, 0x4(%ebp)		/* regs->cp_ebx = %ebx */
675	movl	%ecx, 0x8(%ebp)		/* regs->cp_ecx = %ecx */
676	movl	%edx, 0xc(%ebp)		/* regs->cp_edx = %edx */
677	popl	%edx
678	popl	%ecx
679	popl	%ebx
680	popl	%ebp
681	ret
682	SET_SIZE(__cpuid_insn)
683
684#endif	/* __i386 */
685#endif	/* __lint */
686
687#if defined(__xpv)
688	/*
689	 * Defined in C
690	 */
691#else
692
693#if defined(__lint)
694
695/*ARGSUSED*/
696void
697i86_monitor(volatile uint32_t *addr, uint32_t extensions, uint32_t hints)
698{ return; }
699
700#else   /* __lint */
701
702#if defined(__amd64)
703
704	ENTRY_NP(i86_monitor)
705	pushq	%rbp
706	movq	%rsp, %rbp
707	movq	%rdi, %rax		/* addr */
708	movq	%rsi, %rcx		/* extensions */
709	/* rdx contains input arg3: hints */
710	.byte	0x0f, 0x01, 0xc8	/* monitor */
711	leave
712	ret
713	SET_SIZE(i86_monitor)
714
715#elif defined(__i386)
716
717ENTRY_NP(i86_monitor)
718	pushl	%ebp
719	movl	%esp, %ebp
720	movl	0x8(%ebp),%eax		/* addr */
721	movl	0xc(%ebp),%ecx		/* extensions */
722	movl	0x10(%ebp),%edx		/* hints */
723	.byte	0x0f, 0x01, 0xc8	/* monitor */
724	leave
725	ret
726	SET_SIZE(i86_monitor)
727
728#endif	/* __i386 */
729#endif	/* __lint */
730
731#if defined(__lint)
732
733/*ARGSUSED*/
734void
735i86_mwait(uint32_t data, uint32_t extensions)
736{ return; }
737
738#else	/* __lint */
739
740#if defined(__amd64)
741
742	ENTRY_NP(i86_mwait)
743	pushq	%rbp
744	movq	%rsp, %rbp
745	movq	%rdi, %rax		/* data */
746	movq	%rsi, %rcx		/* extensions */
747	.byte	0x0f, 0x01, 0xc9	/* mwait */
748	leave
749	ret
750	SET_SIZE(i86_mwait)
751
752#elif defined(__i386)
753
754	ENTRY_NP(i86_mwait)
755	pushl	%ebp
756	movl	%esp, %ebp
757	movl	0x8(%ebp),%eax		/* data */
758	movl	0xc(%ebp),%ecx		/* extensions */
759	.byte	0x0f, 0x01, 0xc9	/* mwait */
760	leave
761	ret
762	SET_SIZE(i86_mwait)
763
764#endif	/* __i386 */
765#endif	/* __lint */
766
767#if defined(__lint)
768
769hrtime_t
770tsc_read(void)
771{
772	return (0);
773}
774
775#else	/* __lint */
776
777#if defined(__amd64)
778
779	ENTRY_NP(tsc_read)
780	movq	%rbx, %r11
781	movl	$0, %eax
782	cpuid
783	rdtsc
784	movq	%r11, %rbx
785	shlq	$32, %rdx
786	orq	%rdx, %rax
787	ret
788	.globl _tsc_mfence_start
789_tsc_mfence_start:
790	mfence
791	rdtsc
792	shlq	$32, %rdx
793	orq	%rdx, %rax
794	ret
795	.globl _tsc_mfence_end
796_tsc_mfence_end:
797	.globl _tscp_start
798_tscp_start:
799	.byte	0x0f, 0x01, 0xf9	/* rdtscp instruction */
800	shlq	$32, %rdx
801	orq	%rdx, %rax
802	ret
803	.globl _tscp_end
804_tscp_end:
805	.globl _no_rdtsc_start
806_no_rdtsc_start:
807	xorl	%edx, %edx
808	xorl	%eax, %eax
809	ret
810	.globl _no_rdtsc_end
811_no_rdtsc_end:
812	SET_SIZE(tsc_read)
813
814#else /* __i386 */
815
816	ENTRY_NP(tsc_read)
817	pushl	%ebx
818	movl	$0, %eax
819	cpuid
820	rdtsc
821	popl	%ebx
822	ret
823	.globl _tsc_mfence_start
824_tsc_mfence_start:
825	mfence
826	rdtsc
827	ret
828	.globl _tsc_mfence_end
829_tsc_mfence_end:
830	.globl	_tscp_start
831_tscp_start:
832	.byte	0x0f, 0x01, 0xf9	/* rdtscp instruction */
833	ret
834	.globl _tscp_end
835_tscp_end:
836	.globl _no_rdtsc_start
837_no_rdtsc_start:
838	xorl	%edx, %edx
839	xorl	%eax, %eax
840	ret
841	.globl _no_rdtsc_end
842_no_rdtsc_end:
843	SET_SIZE(tsc_read)
844
845#endif	/* __i386 */
846
847#endif	/* __lint */
848
849#endif	/* __xpv */
850
851/*
852 * Insert entryp after predp in a doubly linked list.
853 */
854
855#if defined(__lint)
856
857/*ARGSUSED*/
858void
859_insque(caddr_t entryp, caddr_t predp)
860{}
861
862#else	/* __lint */
863
864#if defined(__amd64)
865
866	ENTRY(_insque)
867	movq	(%rsi), %rax		/* predp->forw 			*/
868	movq	%rsi, CPTRSIZE(%rdi)	/* entryp->back = predp		*/
869	movq	%rax, (%rdi)		/* entryp->forw = predp->forw	*/
870	movq	%rdi, (%rsi)		/* predp->forw = entryp		*/
871	movq	%rdi, CPTRSIZE(%rax)	/* predp->forw->back = entryp	*/
872	ret
873	SET_SIZE(_insque)
874
875#elif defined(__i386)
876
877	ENTRY(_insque)
878	movl	8(%esp), %edx
879	movl	4(%esp), %ecx
880	movl	(%edx), %eax		/* predp->forw			*/
881	movl	%edx, CPTRSIZE(%ecx)	/* entryp->back = predp		*/
882	movl	%eax, (%ecx)		/* entryp->forw = predp->forw	*/
883	movl	%ecx, (%edx)		/* predp->forw = entryp		*/
884	movl	%ecx, CPTRSIZE(%eax)	/* predp->forw->back = entryp	*/
885	ret
886	SET_SIZE(_insque)
887
888#endif	/* __i386 */
889#endif	/* __lint */
890
891/*
892 * Remove entryp from a doubly linked list
893 */
894
895#if defined(__lint)
896
897/*ARGSUSED*/
898void
899_remque(caddr_t entryp)
900{}
901
902#else	/* __lint */
903
904#if defined(__amd64)
905
906	ENTRY(_remque)
907	movq	(%rdi), %rax		/* entry->forw */
908	movq	CPTRSIZE(%rdi), %rdx	/* entry->back */
909	movq	%rax, (%rdx)		/* entry->back->forw = entry->forw */
910	movq	%rdx, CPTRSIZE(%rax)	/* entry->forw->back = entry->back */
911	ret
912	SET_SIZE(_remque)
913
914#elif defined(__i386)
915
916	ENTRY(_remque)
917	movl	4(%esp), %ecx
918	movl	(%ecx), %eax		/* entry->forw */
919	movl	CPTRSIZE(%ecx), %edx	/* entry->back */
920	movl	%eax, (%edx)		/* entry->back->forw = entry->forw */
921	movl	%edx, CPTRSIZE(%eax)	/* entry->forw->back = entry->back */
922	ret
923	SET_SIZE(_remque)
924
925#endif	/* __i386 */
926#endif	/* __lint */
927
928/*
929 * Returns the number of
930 * non-NULL bytes in string argument.
931 */
932
933#if defined(__lint)
934
935/* ARGSUSED */
936size_t
937strlen(const char *str)
938{ return (0); }
939
940#else	/* __lint */
941
942#if defined(__amd64)
943
944/*
945 * This is close to a simple transliteration of a C version of this
946 * routine.  We should either just -make- this be a C version, or
947 * justify having it in assembler by making it significantly faster.
948 *
949 * size_t
950 * strlen(const char *s)
951 * {
952 *	const char *s0;
953 * #if defined(DEBUG)
954 *	if ((uintptr_t)s < KERNELBASE)
955 *		panic(.str_panic_msg);
956 * #endif
957 *	for (s0 = s; *s; s++)
958 *		;
959 *	return (s - s0);
960 * }
961 */
962
963	ENTRY(strlen)
964#ifdef DEBUG
965	movq	postbootkernelbase(%rip), %rax
966	cmpq	%rax, %rdi
967	jae	str_valid
968	pushq	%rbp
969	movq	%rsp, %rbp
970	leaq	.str_panic_msg(%rip), %rdi
971	xorl	%eax, %eax
972	call	panic
973#endif	/* DEBUG */
974str_valid:
975	cmpb	$0, (%rdi)
976	movq	%rdi, %rax
977	je	.null_found
978	.align	4
979.strlen_loop:
980	incq	%rdi
981	cmpb	$0, (%rdi)
982	jne	.strlen_loop
983.null_found:
984	subq	%rax, %rdi
985	movq	%rdi, %rax
986	ret
987	SET_SIZE(strlen)
988
989#elif defined(__i386)
990
991	ENTRY(strlen)
992#ifdef DEBUG
993	movl	postbootkernelbase, %eax
994	cmpl	%eax, 4(%esp)
995	jae	str_valid
996	pushl	%ebp
997	movl	%esp, %ebp
998	pushl	$.str_panic_msg
999	call	panic
1000#endif /* DEBUG */
1001
1002str_valid:
1003	movl	4(%esp), %eax		/* %eax = string address */
1004	testl	$3, %eax		/* if %eax not word aligned */
1005	jnz	.not_word_aligned	/* goto .not_word_aligned */
1006	.align	4
1007.word_aligned:
1008	movl	(%eax), %edx		/* move 1 word from (%eax) to %edx */
1009	movl	$0x7f7f7f7f, %ecx
1010	andl	%edx, %ecx		/* %ecx = %edx & 0x7f7f7f7f */
1011	addl	$4, %eax		/* next word */
1012	addl	$0x7f7f7f7f, %ecx	/* %ecx += 0x7f7f7f7f */
1013	orl	%edx, %ecx		/* %ecx |= %edx */
1014	andl	$0x80808080, %ecx	/* %ecx &= 0x80808080 */
1015	cmpl	$0x80808080, %ecx	/* if no null byte in this word */
1016	je	.word_aligned		/* goto .word_aligned */
1017	subl	$4, %eax		/* post-incremented */
1018.not_word_aligned:
1019	cmpb	$0, (%eax)		/* if a byte in (%eax) is null */
1020	je	.null_found		/* goto .null_found */
1021	incl	%eax			/* next byte */
1022	testl	$3, %eax		/* if %eax not word aligned */
1023	jnz	.not_word_aligned	/* goto .not_word_aligned */
1024	jmp	.word_aligned		/* goto .word_aligned */
1025	.align	4
1026.null_found:
1027	subl	4(%esp), %eax		/* %eax -= string address */
1028	ret
1029	SET_SIZE(strlen)
1030
1031#endif	/* __i386 */
1032
1033#ifdef DEBUG
1034	.text
1035.str_panic_msg:
1036	.string "strlen: argument below kernelbase"
1037#endif /* DEBUG */
1038
1039#endif	/* __lint */
1040
1041	/*
1042	 * Berkley 4.3 introduced symbolically named interrupt levels
1043	 * as a way deal with priority in a machine independent fashion.
1044	 * Numbered priorities are machine specific, and should be
1045	 * discouraged where possible.
1046	 *
1047	 * Note, for the machine specific priorities there are
1048	 * examples listed for devices that use a particular priority.
1049	 * It should not be construed that all devices of that
1050	 * type should be at that priority.  It is currently were
1051	 * the current devices fit into the priority scheme based
1052	 * upon time criticalness.
1053	 *
1054	 * The underlying assumption of these assignments is that
1055	 * IPL 10 is the highest level from which a device
1056	 * routine can call wakeup.  Devices that interrupt from higher
1057	 * levels are restricted in what they can do.  If they need
1058	 * kernels services they should schedule a routine at a lower
1059	 * level (via software interrupt) to do the required
1060	 * processing.
1061	 *
1062	 * Examples of this higher usage:
1063	 *	Level	Usage
1064	 *	14	Profiling clock (and PROM uart polling clock)
1065	 *	12	Serial ports
1066	 *
1067	 * The serial ports request lower level processing on level 6.
1068	 *
1069	 * Also, almost all splN routines (where N is a number or a
1070	 * mnemonic) will do a RAISE(), on the assumption that they are
1071	 * never used to lower our priority.
1072	 * The exceptions are:
1073	 *	spl8()		Because you can't be above 15 to begin with!
1074	 *	splzs()		Because this is used at boot time to lower our
1075	 *			priority, to allow the PROM to poll the uart.
1076	 *	spl0()		Used to lower priority to 0.
1077	 */
1078
1079#if defined(__lint)
1080
1081int spl0(void)		{ return (0); }
1082int spl6(void)		{ return (0); }
1083int spl7(void)		{ return (0); }
1084int spl8(void)		{ return (0); }
1085int splhigh(void)	{ return (0); }
1086int splhi(void)		{ return (0); }
1087int splzs(void)		{ return (0); }
1088
1089/* ARGSUSED */
1090void
1091splx(int level)
1092{}
1093
1094#else	/* __lint */
1095
1096#if defined(__amd64)
1097
1098#define	SETPRI(level) \
1099	movl	$/**/level, %edi;	/* new priority */		\
1100	jmp	do_splx			/* redirect to do_splx */
1101
1102#define	RAISE(level) \
1103	movl	$/**/level, %edi;	/* new priority */		\
1104	jmp	splr			/* redirect to splr */
1105
1106#elif defined(__i386)
1107
1108#define	SETPRI(level) \
1109	pushl	$/**/level;	/* new priority */			\
1110	call	do_splx;	/* invoke common splx code */		\
1111	addl	$4, %esp;	/* unstack arg */			\
1112	ret
1113
1114#define	RAISE(level) \
1115	pushl	$/**/level;	/* new priority */			\
1116	call	splr;		/* invoke common splr code */		\
1117	addl	$4, %esp;	/* unstack args */			\
1118	ret
1119
1120#endif	/* __i386 */
1121
1122	/* locks out all interrupts, including memory errors */
1123	ENTRY(spl8)
1124	SETPRI(15)
1125	SET_SIZE(spl8)
1126
1127	/* just below the level that profiling runs */
1128	ENTRY(spl7)
1129	RAISE(13)
1130	SET_SIZE(spl7)
1131
1132	/* sun specific - highest priority onboard serial i/o asy ports */
1133	ENTRY(splzs)
1134	SETPRI(12)	/* Can't be a RAISE, as it's used to lower us */
1135	SET_SIZE(splzs)
1136
1137	ENTRY(splhi)
1138	ALTENTRY(splhigh)
1139	ALTENTRY(spl6)
1140	ALTENTRY(i_ddi_splhigh)
1141
1142	RAISE(DISP_LEVEL)
1143
1144	SET_SIZE(i_ddi_splhigh)
1145	SET_SIZE(spl6)
1146	SET_SIZE(splhigh)
1147	SET_SIZE(splhi)
1148
1149	/* allow all interrupts */
1150	ENTRY(spl0)
1151	SETPRI(0)
1152	SET_SIZE(spl0)
1153
1154
1155	/* splx implentation */
1156	ENTRY(splx)
1157	jmp	do_splx		/* redirect to common splx code */
1158	SET_SIZE(splx)
1159
1160#endif	/* __lint */
1161
1162#if defined(__i386)
1163
1164/*
1165 * Read and write the %gs register
1166 */
1167
1168#if defined(__lint)
1169
1170/*ARGSUSED*/
1171uint16_t
1172getgs(void)
1173{ return (0); }
1174
1175/*ARGSUSED*/
1176void
1177setgs(uint16_t sel)
1178{}
1179
1180#else	/* __lint */
1181
1182	ENTRY(getgs)
1183	clr	%eax
1184	movw	%gs, %ax
1185	ret
1186	SET_SIZE(getgs)
1187
1188	ENTRY(setgs)
1189	movw	4(%esp), %gs
1190	ret
1191	SET_SIZE(setgs)
1192
1193#endif	/* __lint */
1194#endif	/* __i386 */
1195
1196#if defined(__lint)
1197
1198void
1199pc_reset(void)
1200{}
1201
1202void
1203efi_reset(void)
1204{}
1205
1206#else	/* __lint */
1207
1208	ENTRY(wait_500ms)
1209	push	%ebx
1210	movl	$50000, %ebx
12111:
1212	call	tenmicrosec
1213	decl	%ebx
1214	jnz	1b
1215	pop	%ebx
1216	ret
1217	SET_SIZE(wait_500ms)
1218
1219#define	RESET_METHOD_KBC	1
1220#define	RESET_METHOD_PORT92	2
1221#define RESET_METHOD_PCI	4
1222
1223	DGDEF3(pc_reset_methods, 4, 8)
1224	.long RESET_METHOD_KBC|RESET_METHOD_PORT92|RESET_METHOD_PCI;
1225
1226	ENTRY(pc_reset)
1227
1228#if defined(__i386)
1229	testl	$RESET_METHOD_KBC, pc_reset_methods
1230#elif defined(__amd64)
1231	testl	$RESET_METHOD_KBC, pc_reset_methods(%rip)
1232#endif
1233	jz	1f
1234
1235	/
1236	/ Try the classic keyboard controller-triggered reset.
1237	/
1238	movw	$0x64, %dx
1239	movb	$0xfe, %al
1240	outb	(%dx)
1241
1242	/ Wait up to 500 milliseconds here for the keyboard controller
1243	/ to pull the reset line.  On some systems where the keyboard
1244	/ controller is slow to pull the reset line, the next reset method
1245	/ may be executed (which may be bad if those systems hang when the
1246	/ next reset method is used, e.g. Ferrari 3400 (doesn't like port 92),
1247	/ and Ferrari 4000 (doesn't like the cf9 reset method))
1248
1249	call	wait_500ms
1250
12511:
1252#if defined(__i386)
1253	testl	$RESET_METHOD_PORT92, pc_reset_methods
1254#elif defined(__amd64)
1255	testl	$RESET_METHOD_PORT92, pc_reset_methods(%rip)
1256#endif
1257	jz	3f
1258
1259	/
1260	/ Try port 0x92 fast reset
1261	/
1262	movw	$0x92, %dx
1263	inb	(%dx)
1264	cmpb	$0xff, %al	/ If port's not there, we should get back 0xFF
1265	je	1f
1266	testb	$1, %al		/ If bit 0
1267	jz	2f		/ is clear, jump to perform the reset
1268	andb	$0xfe, %al	/ otherwise,
1269	outb	(%dx)		/ clear bit 0 first, then
12702:
1271	orb	$1, %al		/ Set bit 0
1272	outb	(%dx)		/ and reset the system
12731:
1274
1275	call	wait_500ms
1276
12773:
1278#if defined(__i386)
1279	testl	$RESET_METHOD_PCI, pc_reset_methods
1280#elif defined(__amd64)
1281	testl	$RESET_METHOD_PCI, pc_reset_methods(%rip)
1282#endif
1283	jz	4f
1284
1285	/ Try the PCI (soft) reset vector (should work on all modern systems,
1286	/ but has been shown to cause problems on 450NX systems, and some newer
1287	/ systems (e.g. ATI IXP400-equipped systems))
1288	/ When resetting via this method, 2 writes are required.  The first
1289	/ targets bit 1 (0=hard reset without power cycle, 1=hard reset with
1290	/ power cycle).
1291	/ The reset occurs on the second write, during bit 2's transition from
1292	/ 0->1.
1293	movw	$0xcf9, %dx
1294	movb	$0x2, %al	/ Reset mode = hard, no power cycle
1295	outb	(%dx)
1296	movb	$0x6, %al
1297	outb	(%dx)
1298
1299	call	wait_500ms
1300
13014:
1302	/
1303	/ port 0xcf9 failed also.  Last-ditch effort is to
1304	/ triple-fault the CPU.
1305	/ Also, use triple fault for EFI firmware
1306	/
1307	ENTRY(efi_reset)
1308#if defined(__amd64)
1309	pushq	$0x0
1310	pushq	$0x0		/ IDT base of 0, limit of 0 + 2 unused bytes
1311	lidt	(%rsp)
1312#elif defined(__i386)
1313	pushl	$0x0
1314	pushl	$0x0		/ IDT base of 0, limit of 0 + 2 unused bytes
1315	lidt	(%esp)
1316#endif
1317	int	$0x0		/ Trigger interrupt, generate triple-fault
1318
1319	cli
1320	hlt			/ Wait forever
1321	/*NOTREACHED*/
1322	SET_SIZE(efi_reset)
1323	SET_SIZE(pc_reset)
1324
1325#endif	/* __lint */
1326
1327/*
1328 * C callable in and out routines
1329 */
1330
1331#if defined(__lint)
1332
1333/* ARGSUSED */
1334void
1335outl(int port_address, uint32_t val)
1336{}
1337
1338#else	/* __lint */
1339
1340#if defined(__amd64)
1341
1342	ENTRY(outl)
1343	movw	%di, %dx
1344	movl	%esi, %eax
1345	outl	(%dx)
1346	ret
1347	SET_SIZE(outl)
1348
1349#elif defined(__i386)
1350
1351	.set	PORT, 4
1352	.set	VAL, 8
1353
1354	ENTRY(outl)
1355	movw	PORT(%esp), %dx
1356	movl	VAL(%esp), %eax
1357	outl	(%dx)
1358	ret
1359	SET_SIZE(outl)
1360
1361#endif	/* __i386 */
1362#endif	/* __lint */
1363
1364#if defined(__lint)
1365
1366/* ARGSUSED */
1367void
1368outw(int port_address, uint16_t val)
1369{}
1370
1371#else	/* __lint */
1372
1373#if defined(__amd64)
1374
1375	ENTRY(outw)
1376	movw	%di, %dx
1377	movw	%si, %ax
1378	D16 outl (%dx)		/* XX64 why not outw? */
1379	ret
1380	SET_SIZE(outw)
1381
1382#elif defined(__i386)
1383
1384	ENTRY(outw)
1385	movw	PORT(%esp), %dx
1386	movw	VAL(%esp), %ax
1387	D16 outl (%dx)
1388	ret
1389	SET_SIZE(outw)
1390
1391#endif	/* __i386 */
1392#endif	/* __lint */
1393
1394#if defined(__lint)
1395
1396/* ARGSUSED */
1397void
1398outb(int port_address, uint8_t val)
1399{}
1400
1401#else	/* __lint */
1402
1403#if defined(__amd64)
1404
1405	ENTRY(outb)
1406	movw	%di, %dx
1407	movb	%sil, %al
1408	outb	(%dx)
1409	ret
1410	SET_SIZE(outb)
1411
1412#elif defined(__i386)
1413
1414	ENTRY(outb)
1415	movw	PORT(%esp), %dx
1416	movb	VAL(%esp), %al
1417	outb	(%dx)
1418	ret
1419	SET_SIZE(outb)
1420
1421#endif	/* __i386 */
1422#endif	/* __lint */
1423
1424#if defined(__lint)
1425
1426/* ARGSUSED */
1427uint32_t
1428inl(int port_address)
1429{ return (0); }
1430
1431#else	/* __lint */
1432
1433#if defined(__amd64)
1434
1435	ENTRY(inl)
1436	xorl	%eax, %eax
1437	movw	%di, %dx
1438	inl	(%dx)
1439	ret
1440	SET_SIZE(inl)
1441
1442#elif defined(__i386)
1443
1444	ENTRY(inl)
1445	movw	PORT(%esp), %dx
1446	inl	(%dx)
1447	ret
1448	SET_SIZE(inl)
1449
1450#endif	/* __i386 */
1451#endif	/* __lint */
1452
1453#if defined(__lint)
1454
1455/* ARGSUSED */
1456uint16_t
1457inw(int port_address)
1458{ return (0); }
1459
1460#else	/* __lint */
1461
1462#if defined(__amd64)
1463
1464	ENTRY(inw)
1465	xorl	%eax, %eax
1466	movw	%di, %dx
1467	D16 inl	(%dx)
1468	ret
1469	SET_SIZE(inw)
1470
1471#elif defined(__i386)
1472
1473	ENTRY(inw)
1474	subl	%eax, %eax
1475	movw	PORT(%esp), %dx
1476	D16 inl	(%dx)
1477	ret
1478	SET_SIZE(inw)
1479
1480#endif	/* __i386 */
1481#endif	/* __lint */
1482
1483
1484#if defined(__lint)
1485
1486/* ARGSUSED */
1487uint8_t
1488inb(int port_address)
1489{ return (0); }
1490
1491#else	/* __lint */
1492
1493#if defined(__amd64)
1494
1495	ENTRY(inb)
1496	xorl	%eax, %eax
1497	movw	%di, %dx
1498	inb	(%dx)
1499	ret
1500	SET_SIZE(inb)
1501
1502#elif defined(__i386)
1503
1504	ENTRY(inb)
1505	subl    %eax, %eax
1506	movw	PORT(%esp), %dx
1507	inb	(%dx)
1508	ret
1509	SET_SIZE(inb)
1510
1511#endif	/* __i386 */
1512#endif	/* __lint */
1513
1514
1515#if defined(__lint)
1516
1517/* ARGSUSED */
1518void
1519repoutsw(int port, uint16_t *addr, int cnt)
1520{}
1521
1522#else	/* __lint */
1523
1524#if defined(__amd64)
1525
1526	ENTRY(repoutsw)
1527	movl	%edx, %ecx
1528	movw	%di, %dx
1529	rep
1530	  D16 outsl
1531	ret
1532	SET_SIZE(repoutsw)
1533
1534#elif defined(__i386)
1535
1536	/*
1537	 * The arguments and saved registers are on the stack in the
1538	 *  following order:
1539	 *      |  cnt  |  +16
1540	 *      | *addr |  +12
1541	 *      | port  |  +8
1542	 *      |  eip  |  +4
1543	 *      |  esi  |  <-- %esp
1544	 * If additional values are pushed onto the stack, make sure
1545	 * to adjust the following constants accordingly.
1546	 */
1547	.set	PORT, 8
1548	.set	ADDR, 12
1549	.set	COUNT, 16
1550
1551	ENTRY(repoutsw)
1552	pushl	%esi
1553	movl	PORT(%esp), %edx
1554	movl	ADDR(%esp), %esi
1555	movl	COUNT(%esp), %ecx
1556	rep
1557	  D16 outsl
1558	popl	%esi
1559	ret
1560	SET_SIZE(repoutsw)
1561
1562#endif	/* __i386 */
1563#endif	/* __lint */
1564
1565
1566#if defined(__lint)
1567
1568/* ARGSUSED */
1569void
1570repinsw(int port_addr, uint16_t *addr, int cnt)
1571{}
1572
1573#else	/* __lint */
1574
1575#if defined(__amd64)
1576
1577	ENTRY(repinsw)
1578	movl	%edx, %ecx
1579	movw	%di, %dx
1580	rep
1581	  D16 insl
1582	ret
1583	SET_SIZE(repinsw)
1584
1585#elif defined(__i386)
1586
1587	ENTRY(repinsw)
1588	pushl	%edi
1589	movl	PORT(%esp), %edx
1590	movl	ADDR(%esp), %edi
1591	movl	COUNT(%esp), %ecx
1592	rep
1593	  D16 insl
1594	popl	%edi
1595	ret
1596	SET_SIZE(repinsw)
1597
1598#endif	/* __i386 */
1599#endif	/* __lint */
1600
1601
1602#if defined(__lint)
1603
1604/* ARGSUSED */
1605void
1606repinsb(int port, uint8_t *addr, int count)
1607{}
1608
1609#else	/* __lint */
1610
1611#if defined(__amd64)
1612
1613	ENTRY(repinsb)
1614	movl	%edx, %ecx
1615	movw	%di, %dx
1616	movq	%rsi, %rdi
1617	rep
1618	  insb
1619	ret
1620	SET_SIZE(repinsb)
1621
1622#elif defined(__i386)
1623
1624	/*
1625	 * The arguments and saved registers are on the stack in the
1626	 *  following order:
1627	 *      |  cnt  |  +16
1628	 *      | *addr |  +12
1629	 *      | port  |  +8
1630	 *      |  eip  |  +4
1631	 *      |  esi  |  <-- %esp
1632	 * If additional values are pushed onto the stack, make sure
1633	 * to adjust the following constants accordingly.
1634	 */
1635	.set	IO_PORT, 8
1636	.set	IO_ADDR, 12
1637	.set	IO_COUNT, 16
1638
1639	ENTRY(repinsb)
1640	pushl	%edi
1641	movl	IO_ADDR(%esp), %edi
1642	movl	IO_COUNT(%esp), %ecx
1643	movl	IO_PORT(%esp), %edx
1644	rep
1645	  insb
1646	popl	%edi
1647	ret
1648	SET_SIZE(repinsb)
1649
1650#endif	/* __i386 */
1651#endif	/* __lint */
1652
1653
1654/*
1655 * Input a stream of 32-bit words.
1656 * NOTE: count is a DWORD count.
1657 */
1658#if defined(__lint)
1659
1660/* ARGSUSED */
1661void
1662repinsd(int port, uint32_t *addr, int count)
1663{}
1664
1665#else	/* __lint */
1666
1667#if defined(__amd64)
1668
1669	ENTRY(repinsd)
1670	movl	%edx, %ecx
1671	movw	%di, %dx
1672	movq	%rsi, %rdi
1673	rep
1674	  insl
1675	ret
1676	SET_SIZE(repinsd)
1677
1678#elif defined(__i386)
1679
1680	ENTRY(repinsd)
1681	pushl	%edi
1682	movl	IO_ADDR(%esp), %edi
1683	movl	IO_COUNT(%esp), %ecx
1684	movl	IO_PORT(%esp), %edx
1685	rep
1686	  insl
1687	popl	%edi
1688	ret
1689	SET_SIZE(repinsd)
1690
1691#endif	/* __i386 */
1692#endif	/* __lint */
1693
1694/*
1695 * Output a stream of bytes
1696 * NOTE: count is a byte count
1697 */
1698#if defined(__lint)
1699
1700/* ARGSUSED */
1701void
1702repoutsb(int port, uint8_t *addr, int count)
1703{}
1704
1705#else	/* __lint */
1706
1707#if defined(__amd64)
1708
1709	ENTRY(repoutsb)
1710	movl	%edx, %ecx
1711	movw	%di, %dx
1712	rep
1713	  outsb
1714	ret
1715	SET_SIZE(repoutsb)
1716
1717#elif defined(__i386)
1718
1719	ENTRY(repoutsb)
1720	pushl	%esi
1721	movl	IO_ADDR(%esp), %esi
1722	movl	IO_COUNT(%esp), %ecx
1723	movl	IO_PORT(%esp), %edx
1724	rep
1725	  outsb
1726	popl	%esi
1727	ret
1728	SET_SIZE(repoutsb)
1729
1730#endif	/* __i386 */
1731#endif	/* __lint */
1732
1733/*
1734 * Output a stream of 32-bit words
1735 * NOTE: count is a DWORD count
1736 */
1737#if defined(__lint)
1738
1739/* ARGSUSED */
1740void
1741repoutsd(int port, uint32_t *addr, int count)
1742{}
1743
1744#else	/* __lint */
1745
1746#if defined(__amd64)
1747
1748	ENTRY(repoutsd)
1749	movl	%edx, %ecx
1750	movw	%di, %dx
1751	rep
1752	  outsl
1753	ret
1754	SET_SIZE(repoutsd)
1755
1756#elif defined(__i386)
1757
1758	ENTRY(repoutsd)
1759	pushl	%esi
1760	movl	IO_ADDR(%esp), %esi
1761	movl	IO_COUNT(%esp), %ecx
1762	movl	IO_PORT(%esp), %edx
1763	rep
1764	  outsl
1765	popl	%esi
1766	ret
1767	SET_SIZE(repoutsd)
1768
1769#endif	/* __i386 */
1770#endif	/* __lint */
1771
1772/*
1773 * void int3(void)
1774 * void int18(void)
1775 * void int20(void)
1776 */
1777
1778#if defined(__lint)
1779
1780void
1781int3(void)
1782{}
1783
1784void
1785int18(void)
1786{}
1787
1788void
1789int20(void)
1790{}
1791
1792#else	/* __lint */
1793
1794	ENTRY(int3)
1795	int	$T_BPTFLT
1796	ret
1797	SET_SIZE(int3)
1798
1799	ENTRY(int18)
1800	int	$T_MCE
1801	ret
1802	SET_SIZE(int18)
1803
1804	ENTRY(int20)
1805	movl	boothowto, %eax
1806	andl	$RB_DEBUG, %eax
1807	jz	1f
1808
1809	int	$T_DBGENTR
18101:
1811	rep;	ret	/* use 2 byte return instruction when branch target */
1812			/* AMD Software Optimization Guide - Section 6.2 */
1813	SET_SIZE(int20)
1814
1815#endif	/* __lint */
1816
1817#if defined(__lint)
1818
1819/* ARGSUSED */
1820int
1821scanc(size_t size, uchar_t *cp, uchar_t *table, uchar_t mask)
1822{ return (0); }
1823
1824#else	/* __lint */
1825
1826#if defined(__amd64)
1827
1828	ENTRY(scanc)
1829					/* rdi == size */
1830					/* rsi == cp */
1831					/* rdx == table */
1832					/* rcx == mask */
1833	addq	%rsi, %rdi		/* end = &cp[size] */
1834.scanloop:
1835	cmpq	%rdi, %rsi		/* while (cp < end */
1836	jnb	.scandone
1837	movzbq	(%rsi), %r8		/* %r8 = *cp */
1838	incq	%rsi			/* cp++ */
1839	testb	%cl, (%r8, %rdx)
1840	jz	.scanloop		/*  && (table[*cp] & mask) == 0) */
1841	decq	%rsi			/* (fix post-increment) */
1842.scandone:
1843	movl	%edi, %eax
1844	subl	%esi, %eax		/* return (end - cp) */
1845	ret
1846	SET_SIZE(scanc)
1847
1848#elif defined(__i386)
1849
1850	ENTRY(scanc)
1851	pushl	%edi
1852	pushl	%esi
1853	movb	24(%esp), %cl		/* mask = %cl */
1854	movl	16(%esp), %esi		/* cp = %esi */
1855	movl	20(%esp), %edx		/* table = %edx */
1856	movl	%esi, %edi
1857	addl	12(%esp), %edi		/* end = &cp[size]; */
1858.scanloop:
1859	cmpl	%edi, %esi		/* while (cp < end */
1860	jnb	.scandone
1861	movzbl	(%esi),  %eax		/* %al = *cp */
1862	incl	%esi			/* cp++ */
1863	movb	(%edx,  %eax), %al	/* %al = table[*cp] */
1864	testb	%al, %cl
1865	jz	.scanloop		/*   && (table[*cp] & mask) == 0) */
1866	dec	%esi			/* post-incremented */
1867.scandone:
1868	movl	%edi, %eax
1869	subl	%esi, %eax		/* return (end - cp) */
1870	popl	%esi
1871	popl	%edi
1872	ret
1873	SET_SIZE(scanc)
1874
1875#endif	/* __i386 */
1876#endif	/* __lint */
1877
1878/*
1879 * Replacement functions for ones that are normally inlined.
1880 * In addition to the copy in i86.il, they are defined here just in case.
1881 */
1882
1883#if defined(__lint)
1884
1885ulong_t
1886intr_clear(void)
1887{ return (0); }
1888
1889ulong_t
1890clear_int_flag(void)
1891{ return (0); }
1892
1893#else	/* __lint */
1894
1895#if defined(__amd64)
1896
1897	ENTRY(intr_clear)
1898	ENTRY(clear_int_flag)
1899	pushfq
1900	popq	%rax
1901#if defined(__xpv)
1902	leaq	xpv_panicking, %rdi
1903	movl	(%rdi), %edi
1904	cmpl	$0, %edi
1905	jne	2f
1906	CLIRET(%rdi, %dl)	/* returns event mask in %dl */
1907	/*
1908	 * Synthesize the PS_IE bit from the event mask bit
1909	 */
1910	andq    $_BITNOT(PS_IE), %rax
1911	testb	$1, %dl
1912	jnz	1f
1913	orq	$PS_IE, %rax
19141:
1915	ret
19162:
1917#endif
1918	CLI(%rdi)
1919	ret
1920	SET_SIZE(clear_int_flag)
1921	SET_SIZE(intr_clear)
1922
1923#elif defined(__i386)
1924
1925	ENTRY(intr_clear)
1926	ENTRY(clear_int_flag)
1927	pushfl
1928	popl	%eax
1929#if defined(__xpv)
1930	leal	xpv_panicking, %edx
1931	movl	(%edx), %edx
1932	cmpl	$0, %edx
1933	jne	2f
1934	CLIRET(%edx, %cl)	/* returns event mask in %cl */
1935	/*
1936	 * Synthesize the PS_IE bit from the event mask bit
1937	 */
1938	andl    $_BITNOT(PS_IE), %eax
1939	testb	$1, %cl
1940	jnz	1f
1941	orl	$PS_IE, %eax
19421:
1943	ret
19442:
1945#endif
1946	CLI(%edx)
1947	ret
1948	SET_SIZE(clear_int_flag)
1949	SET_SIZE(intr_clear)
1950
1951#endif	/* __i386 */
1952#endif	/* __lint */
1953
1954#if defined(__lint)
1955
1956struct cpu *
1957curcpup(void)
1958{ return 0; }
1959
1960#else	/* __lint */
1961
1962#if defined(__amd64)
1963
1964	ENTRY(curcpup)
1965	movq	%gs:CPU_SELF, %rax
1966	ret
1967	SET_SIZE(curcpup)
1968
1969#elif defined(__i386)
1970
1971	ENTRY(curcpup)
1972	movl	%gs:CPU_SELF, %eax
1973	ret
1974	SET_SIZE(curcpup)
1975
1976#endif	/* __i386 */
1977#endif	/* __lint */
1978
1979#if defined(__lint)
1980
1981/* ARGSUSED */
1982uint32_t
1983htonl(uint32_t i)
1984{ return (0); }
1985
1986/* ARGSUSED */
1987uint32_t
1988ntohl(uint32_t i)
1989{ return (0); }
1990
1991#else	/* __lint */
1992
1993#if defined(__amd64)
1994
1995	/* XX64 there must be shorter sequences for this */
1996	ENTRY(htonl)
1997	ALTENTRY(ntohl)
1998	movl	%edi, %eax
1999	bswap	%eax
2000	ret
2001	SET_SIZE(ntohl)
2002	SET_SIZE(htonl)
2003
2004#elif defined(__i386)
2005
2006	ENTRY(htonl)
2007	ALTENTRY(ntohl)
2008	movl	4(%esp), %eax
2009	bswap	%eax
2010	ret
2011	SET_SIZE(ntohl)
2012	SET_SIZE(htonl)
2013
2014#endif	/* __i386 */
2015#endif	/* __lint */
2016
2017#if defined(__lint)
2018
2019/* ARGSUSED */
2020uint16_t
2021htons(uint16_t i)
2022{ return (0); }
2023
2024/* ARGSUSED */
2025uint16_t
2026ntohs(uint16_t i)
2027{ return (0); }
2028
2029
2030#else	/* __lint */
2031
2032#if defined(__amd64)
2033
2034	/* XX64 there must be better sequences for this */
2035	ENTRY(htons)
2036	ALTENTRY(ntohs)
2037	movl	%edi, %eax
2038	bswap	%eax
2039	shrl	$16, %eax
2040	ret
2041	SET_SIZE(ntohs)
2042	SET_SIZE(htons)
2043
2044#elif defined(__i386)
2045
2046	ENTRY(htons)
2047	ALTENTRY(ntohs)
2048	movl	4(%esp), %eax
2049	bswap	%eax
2050	shrl	$16, %eax
2051	ret
2052	SET_SIZE(ntohs)
2053	SET_SIZE(htons)
2054
2055#endif	/* __i386 */
2056#endif	/* __lint */
2057
2058
2059#if defined(__lint)
2060
2061/* ARGSUSED */
2062void
2063intr_restore(ulong_t i)
2064{ return; }
2065
2066/* ARGSUSED */
2067void
2068restore_int_flag(ulong_t i)
2069{ return; }
2070
2071#else	/* __lint */
2072
2073#if defined(__amd64)
2074
2075	ENTRY(intr_restore)
2076	ENTRY(restore_int_flag)
2077	testq	$PS_IE, %rdi
2078	jz	1f
2079#if defined(__xpv)
2080	leaq	xpv_panicking, %rsi
2081	movl	(%rsi), %esi
2082	cmpl	$0, %esi
2083	jne	1f
2084	/*
2085	 * Since we're -really- running unprivileged, our attempt
2086	 * to change the state of the IF bit will be ignored.
2087	 * The virtual IF bit is tweaked by CLI and STI.
2088	 */
2089	IE_TO_EVENT_MASK(%rsi, %rdi)
2090#else
2091	sti
2092#endif
20931:
2094	ret
2095	SET_SIZE(restore_int_flag)
2096	SET_SIZE(intr_restore)
2097
2098#elif defined(__i386)
2099
2100	ENTRY(intr_restore)
2101	ENTRY(restore_int_flag)
2102	testl	$PS_IE, 4(%esp)
2103	jz	1f
2104#if defined(__xpv)
2105	leal	xpv_panicking, %edx
2106	movl	(%edx), %edx
2107	cmpl	$0, %edx
2108	jne	1f
2109	/*
2110	 * Since we're -really- running unprivileged, our attempt
2111	 * to change the state of the IF bit will be ignored.
2112	 * The virtual IF bit is tweaked by CLI and STI.
2113	 */
2114	IE_TO_EVENT_MASK(%edx, 4(%esp))
2115#else
2116	sti
2117#endif
21181:
2119	ret
2120	SET_SIZE(restore_int_flag)
2121	SET_SIZE(intr_restore)
2122
2123#endif	/* __i386 */
2124#endif	/* __lint */
2125
2126#if defined(__lint)
2127
2128void
2129sti(void)
2130{}
2131
2132void
2133cli(void)
2134{}
2135
2136#else	/* __lint */
2137
2138	ENTRY(sti)
2139	STI
2140	ret
2141	SET_SIZE(sti)
2142
2143	ENTRY(cli)
2144#if defined(__amd64)
2145	CLI(%rax)
2146#elif defined(__i386)
2147	CLI(%eax)
2148#endif	/* __i386 */
2149	ret
2150	SET_SIZE(cli)
2151
2152#endif	/* __lint */
2153
2154#if defined(__lint)
2155
2156dtrace_icookie_t
2157dtrace_interrupt_disable(void)
2158{ return (0); }
2159
2160#else   /* __lint */
2161
2162#if defined(__amd64)
2163
2164	ENTRY(dtrace_interrupt_disable)
2165	pushfq
2166	popq	%rax
2167#if defined(__xpv)
2168	leaq	xpv_panicking, %rdi
2169	movl	(%rdi), %edi
2170	cmpl	$0, %edi
2171	jne	1f
2172	CLIRET(%rdi, %dl)	/* returns event mask in %dl */
2173	/*
2174	 * Synthesize the PS_IE bit from the event mask bit
2175	 */
2176	andq    $_BITNOT(PS_IE), %rax
2177	testb	$1, %dl
2178	jnz	1f
2179	orq	$PS_IE, %rax
21801:
2181#else
2182	CLI(%rdx)
2183#endif
2184	ret
2185	SET_SIZE(dtrace_interrupt_disable)
2186
2187#elif defined(__i386)
2188
2189	ENTRY(dtrace_interrupt_disable)
2190	pushfl
2191	popl	%eax
2192#if defined(__xpv)
2193	leal	xpv_panicking, %edx
2194	movl	(%edx), %edx
2195	cmpl	$0, %edx
2196	jne	1f
2197	CLIRET(%edx, %cl)	/* returns event mask in %cl */
2198	/*
2199	 * Synthesize the PS_IE bit from the event mask bit
2200	 */
2201	andl    $_BITNOT(PS_IE), %eax
2202	testb	$1, %cl
2203	jnz	1f
2204	orl	$PS_IE, %eax
22051:
2206#else
2207	CLI(%edx)
2208#endif
2209	ret
2210	SET_SIZE(dtrace_interrupt_disable)
2211
2212#endif	/* __i386 */
2213#endif	/* __lint */
2214
2215#if defined(__lint)
2216
2217/*ARGSUSED*/
2218void
2219dtrace_interrupt_enable(dtrace_icookie_t cookie)
2220{}
2221
2222#else	/* __lint */
2223
2224#if defined(__amd64)
2225
2226	ENTRY(dtrace_interrupt_enable)
2227	pushq	%rdi
2228	popfq
2229#if defined(__xpv)
2230	leaq	xpv_panicking, %rdx
2231	movl	(%rdx), %edx
2232	cmpl	$0, %edx
2233	jne	1f
2234	/*
2235	 * Since we're -really- running unprivileged, our attempt
2236	 * to change the state of the IF bit will be ignored. The
2237	 * virtual IF bit is tweaked by CLI and STI.
2238	 */
2239	IE_TO_EVENT_MASK(%rdx, %rdi)
2240#endif
2241	ret
2242	SET_SIZE(dtrace_interrupt_enable)
2243
2244#elif defined(__i386)
2245
2246	ENTRY(dtrace_interrupt_enable)
2247	movl	4(%esp), %eax
2248	pushl	%eax
2249	popfl
2250#if defined(__xpv)
2251	leal	xpv_panicking, %edx
2252	movl	(%edx), %edx
2253	cmpl	$0, %edx
2254	jne	1f
2255	/*
2256	 * Since we're -really- running unprivileged, our attempt
2257	 * to change the state of the IF bit will be ignored. The
2258	 * virtual IF bit is tweaked by CLI and STI.
2259	 */
2260	IE_TO_EVENT_MASK(%edx, %eax)
2261#endif
2262	ret
2263	SET_SIZE(dtrace_interrupt_enable)
2264
2265#endif	/* __i386 */
2266#endif	/* __lint */
2267
2268
2269#if defined(lint)
2270
2271void
2272dtrace_membar_producer(void)
2273{}
2274
2275void
2276dtrace_membar_consumer(void)
2277{}
2278
2279#else	/* __lint */
2280
2281	ENTRY(dtrace_membar_producer)
2282	rep;	ret	/* use 2 byte return instruction when branch target */
2283			/* AMD Software Optimization Guide - Section 6.2 */
2284	SET_SIZE(dtrace_membar_producer)
2285
2286	ENTRY(dtrace_membar_consumer)
2287	rep;	ret	/* use 2 byte return instruction when branch target */
2288			/* AMD Software Optimization Guide - Section 6.2 */
2289	SET_SIZE(dtrace_membar_consumer)
2290
2291#endif	/* __lint */
2292
2293#if defined(__lint)
2294
2295kthread_id_t
2296threadp(void)
2297{ return ((kthread_id_t)0); }
2298
2299#else	/* __lint */
2300
2301#if defined(__amd64)
2302
2303	ENTRY(threadp)
2304	movq	%gs:CPU_THREAD, %rax
2305	ret
2306	SET_SIZE(threadp)
2307
2308#elif defined(__i386)
2309
2310	ENTRY(threadp)
2311	movl	%gs:CPU_THREAD, %eax
2312	ret
2313	SET_SIZE(threadp)
2314
2315#endif	/* __i386 */
2316#endif	/* __lint */
2317
2318/*
2319 *   Checksum routine for Internet Protocol Headers
2320 */
2321
2322#if defined(__lint)
2323
2324/* ARGSUSED */
2325unsigned int
2326ip_ocsum(
2327	ushort_t *address,	/* ptr to 1st message buffer */
2328	int halfword_count,	/* length of data */
2329	unsigned int sum)	/* partial checksum */
2330{
2331	int		i;
2332	unsigned int	psum = 0;	/* partial sum */
2333
2334	for (i = 0; i < halfword_count; i++, address++) {
2335		psum += *address;
2336	}
2337
2338	while ((psum >> 16) != 0) {
2339		psum = (psum & 0xffff) + (psum >> 16);
2340	}
2341
2342	psum += sum;
2343
2344	while ((psum >> 16) != 0) {
2345		psum = (psum & 0xffff) + (psum >> 16);
2346	}
2347
2348	return (psum);
2349}
2350
2351#else	/* __lint */
2352
2353#if defined(__amd64)
2354
2355	ENTRY(ip_ocsum)
2356	pushq	%rbp
2357	movq	%rsp, %rbp
2358#ifdef DEBUG
2359	movq	postbootkernelbase(%rip), %rax
2360	cmpq	%rax, %rdi
2361	jnb	1f
2362	xorl	%eax, %eax
2363	movq	%rdi, %rsi
2364	leaq	.ip_ocsum_panic_msg(%rip), %rdi
2365	call	panic
2366	/*NOTREACHED*/
2367.ip_ocsum_panic_msg:
2368	.string	"ip_ocsum: address 0x%p below kernelbase\n"
23691:
2370#endif
2371	movl	%esi, %ecx	/* halfword_count */
2372	movq	%rdi, %rsi	/* address */
2373				/* partial sum in %edx */
2374	xorl	%eax, %eax
2375	testl	%ecx, %ecx
2376	jz	.ip_ocsum_done
2377	testq	$3, %rsi
2378	jnz	.ip_csum_notaligned
2379.ip_csum_aligned:	/* XX64 opportunities for 8-byte operations? */
2380.next_iter:
2381	/* XX64 opportunities for prefetch? */
2382	/* XX64 compute csum with 64 bit quantities? */
2383	subl	$32, %ecx
2384	jl	.less_than_32
2385
2386	addl	0(%rsi), %edx
2387.only60:
2388	adcl	4(%rsi), %eax
2389.only56:
2390	adcl	8(%rsi), %edx
2391.only52:
2392	adcl	12(%rsi), %eax
2393.only48:
2394	adcl	16(%rsi), %edx
2395.only44:
2396	adcl	20(%rsi), %eax
2397.only40:
2398	adcl	24(%rsi), %edx
2399.only36:
2400	adcl	28(%rsi), %eax
2401.only32:
2402	adcl	32(%rsi), %edx
2403.only28:
2404	adcl	36(%rsi), %eax
2405.only24:
2406	adcl	40(%rsi), %edx
2407.only20:
2408	adcl	44(%rsi), %eax
2409.only16:
2410	adcl	48(%rsi), %edx
2411.only12:
2412	adcl	52(%rsi), %eax
2413.only8:
2414	adcl	56(%rsi), %edx
2415.only4:
2416	adcl	60(%rsi), %eax	/* could be adding -1 and -1 with a carry */
2417.only0:
2418	adcl	$0, %eax	/* could be adding -1 in eax with a carry */
2419	adcl	$0, %eax
2420
2421	addq	$64, %rsi
2422	testl	%ecx, %ecx
2423	jnz	.next_iter
2424
2425.ip_ocsum_done:
2426	addl	%eax, %edx
2427	adcl	$0, %edx
2428	movl	%edx, %eax	/* form a 16 bit checksum by */
2429	shrl	$16, %eax	/* adding two halves of 32 bit checksum */
2430	addw	%dx, %ax
2431	adcw	$0, %ax
2432	andl	$0xffff, %eax
2433	leave
2434	ret
2435
2436.ip_csum_notaligned:
2437	xorl	%edi, %edi
2438	movw	(%rsi), %di
2439	addl	%edi, %edx
2440	adcl	$0, %edx
2441	addq	$2, %rsi
2442	decl	%ecx
2443	jmp	.ip_csum_aligned
2444
2445.less_than_32:
2446	addl	$32, %ecx
2447	testl	$1, %ecx
2448	jz	.size_aligned
2449	andl	$0xfe, %ecx
2450	movzwl	(%rsi, %rcx, 2), %edi
2451	addl	%edi, %edx
2452	adcl	$0, %edx
2453.size_aligned:
2454	movl	%ecx, %edi
2455	shrl	$1, %ecx
2456	shl	$1, %edi
2457	subq	$64, %rdi
2458	addq	%rdi, %rsi
2459	leaq    .ip_ocsum_jmptbl(%rip), %rdi
2460	leaq	(%rdi, %rcx, 8), %rdi
2461	xorl	%ecx, %ecx
2462	clc
2463	jmp 	*(%rdi)
2464
2465	.align	8
2466.ip_ocsum_jmptbl:
2467	.quad	.only0, .only4, .only8, .only12, .only16, .only20
2468	.quad	.only24, .only28, .only32, .only36, .only40, .only44
2469	.quad	.only48, .only52, .only56, .only60
2470	SET_SIZE(ip_ocsum)
2471
2472#elif defined(__i386)
2473
2474	ENTRY(ip_ocsum)
2475	pushl	%ebp
2476	movl	%esp, %ebp
2477	pushl	%ebx
2478	pushl	%esi
2479	pushl	%edi
2480	movl	12(%ebp), %ecx	/* count of half words */
2481	movl	16(%ebp), %edx	/* partial checksum */
2482	movl	8(%ebp), %esi
2483	xorl	%eax, %eax
2484	testl	%ecx, %ecx
2485	jz	.ip_ocsum_done
2486
2487	testl	$3, %esi
2488	jnz	.ip_csum_notaligned
2489.ip_csum_aligned:
2490.next_iter:
2491	subl	$32, %ecx
2492	jl	.less_than_32
2493
2494	addl	0(%esi), %edx
2495.only60:
2496	adcl	4(%esi), %eax
2497.only56:
2498	adcl	8(%esi), %edx
2499.only52:
2500	adcl	12(%esi), %eax
2501.only48:
2502	adcl	16(%esi), %edx
2503.only44:
2504	adcl	20(%esi), %eax
2505.only40:
2506	adcl	24(%esi), %edx
2507.only36:
2508	adcl	28(%esi), %eax
2509.only32:
2510	adcl	32(%esi), %edx
2511.only28:
2512	adcl	36(%esi), %eax
2513.only24:
2514	adcl	40(%esi), %edx
2515.only20:
2516	adcl	44(%esi), %eax
2517.only16:
2518	adcl	48(%esi), %edx
2519.only12:
2520	adcl	52(%esi), %eax
2521.only8:
2522	adcl	56(%esi), %edx
2523.only4:
2524	adcl	60(%esi), %eax	/* We could be adding -1 and -1 with a carry */
2525.only0:
2526	adcl	$0, %eax	/* we could be adding -1 in eax with a carry */
2527	adcl	$0, %eax
2528
2529	addl	$64, %esi
2530	andl	%ecx, %ecx
2531	jnz	.next_iter
2532
2533.ip_ocsum_done:
2534	addl	%eax, %edx
2535	adcl	$0, %edx
2536	movl	%edx, %eax	/* form a 16 bit checksum by */
2537	shrl	$16, %eax	/* adding two halves of 32 bit checksum */
2538	addw	%dx, %ax
2539	adcw	$0, %ax
2540	andl	$0xffff, %eax
2541	popl	%edi		/* restore registers */
2542	popl	%esi
2543	popl	%ebx
2544	leave
2545	ret
2546
2547.ip_csum_notaligned:
2548	xorl	%edi, %edi
2549	movw	(%esi), %di
2550	addl	%edi, %edx
2551	adcl	$0, %edx
2552	addl	$2, %esi
2553	decl	%ecx
2554	jmp	.ip_csum_aligned
2555
2556.less_than_32:
2557	addl	$32, %ecx
2558	testl	$1, %ecx
2559	jz	.size_aligned
2560	andl	$0xfe, %ecx
2561	movzwl	(%esi, %ecx, 2), %edi
2562	addl	%edi, %edx
2563	adcl	$0, %edx
2564.size_aligned:
2565	movl	%ecx, %edi
2566	shrl	$1, %ecx
2567	shl	$1, %edi
2568	subl	$64, %edi
2569	addl	%edi, %esi
2570	movl	$.ip_ocsum_jmptbl, %edi
2571	lea	(%edi, %ecx, 4), %edi
2572	xorl	%ecx, %ecx
2573	clc
2574	jmp 	*(%edi)
2575	SET_SIZE(ip_ocsum)
2576
2577	.data
2578	.align	4
2579
2580.ip_ocsum_jmptbl:
2581	.long	.only0, .only4, .only8, .only12, .only16, .only20
2582	.long	.only24, .only28, .only32, .only36, .only40, .only44
2583	.long	.only48, .only52, .only56, .only60
2584
2585
2586#endif	/* __i386 */
2587#endif	/* __lint */
2588
2589/*
2590 * multiply two long numbers and yield a u_longlong_t result, callable from C.
2591 * Provided to manipulate hrtime_t values.
2592 */
2593#if defined(__lint)
2594
2595/* result = a * b; */
2596
2597/* ARGSUSED */
2598unsigned long long
2599mul32(uint_t a, uint_t b)
2600{ return (0); }
2601
2602#else	/* __lint */
2603
2604#if defined(__amd64)
2605
2606	ENTRY(mul32)
2607	xorl	%edx, %edx	/* XX64 joe, paranoia? */
2608	movl	%edi, %eax
2609	mull	%esi
2610	shlq	$32, %rdx
2611	orq	%rdx, %rax
2612	ret
2613	SET_SIZE(mul32)
2614
2615#elif defined(__i386)
2616
2617	ENTRY(mul32)
2618	movl	8(%esp), %eax
2619	movl	4(%esp), %ecx
2620	mull	%ecx
2621	ret
2622	SET_SIZE(mul32)
2623
2624#endif	/* __i386 */
2625#endif	/* __lint */
2626
2627#if defined(notused)
2628#if defined(__lint)
2629/* ARGSUSED */
2630void
2631load_pte64(uint64_t *pte, uint64_t pte_value)
2632{}
2633#else	/* __lint */
2634	.globl load_pte64
2635load_pte64:
2636	movl	4(%esp), %eax
2637	movl	8(%esp), %ecx
2638	movl	12(%esp), %edx
2639	movl	%edx, 4(%eax)
2640	movl	%ecx, (%eax)
2641	ret
2642#endif	/* __lint */
2643#endif	/* notused */
2644
2645#if defined(__lint)
2646
2647/*ARGSUSED*/
2648void
2649scan_memory(caddr_t addr, size_t size)
2650{}
2651
2652#else	/* __lint */
2653
2654#if defined(__amd64)
2655
2656	ENTRY(scan_memory)
2657	shrq	$3, %rsi	/* convert %rsi from byte to quadword count */
2658	jz	.scanm_done
2659	movq	%rsi, %rcx	/* move count into rep control register */
2660	movq	%rdi, %rsi	/* move addr into lodsq control reg. */
2661	rep lodsq		/* scan the memory range */
2662.scanm_done:
2663	rep;	ret	/* use 2 byte return instruction when branch target */
2664			/* AMD Software Optimization Guide - Section 6.2 */
2665	SET_SIZE(scan_memory)
2666
2667#elif defined(__i386)
2668
2669	ENTRY(scan_memory)
2670	pushl	%ecx
2671	pushl	%esi
2672	movl	16(%esp), %ecx	/* move 2nd arg into rep control register */
2673	shrl	$2, %ecx	/* convert from byte count to word count */
2674	jz	.scanm_done
2675	movl	12(%esp), %esi	/* move 1st arg into lodsw control register */
2676	.byte	0xf3		/* rep prefix.  lame assembler.  sigh. */
2677	lodsl
2678.scanm_done:
2679	popl	%esi
2680	popl	%ecx
2681	ret
2682	SET_SIZE(scan_memory)
2683
2684#endif	/* __i386 */
2685#endif	/* __lint */
2686
2687
2688#if defined(__lint)
2689
2690/*ARGSUSED */
2691int
2692lowbit(ulong_t i)
2693{ return (0); }
2694
2695#else	/* __lint */
2696
2697#if defined(__amd64)
2698
2699	ENTRY(lowbit)
2700	movl	$-1, %eax
2701	bsfq	%rdi, %rax
2702	incl	%eax
2703	ret
2704	SET_SIZE(lowbit)
2705
2706#elif defined(__i386)
2707
2708	ENTRY(lowbit)
2709	movl	$-1, %eax
2710	bsfl	4(%esp), %eax
2711	incl	%eax
2712	ret
2713	SET_SIZE(lowbit)
2714
2715#endif	/* __i386 */
2716#endif	/* __lint */
2717
2718#if defined(__lint)
2719
2720/*ARGSUSED*/
2721int
2722highbit(ulong_t i)
2723{ return (0); }
2724
2725#else	/* __lint */
2726
2727#if defined(__amd64)
2728
2729	ENTRY(highbit)
2730	movl	$-1, %eax
2731	bsrq	%rdi, %rax
2732	incl	%eax
2733	ret
2734	SET_SIZE(highbit)
2735
2736#elif defined(__i386)
2737
2738	ENTRY(highbit)
2739	movl	$-1, %eax
2740	bsrl	4(%esp), %eax
2741	incl	%eax
2742	ret
2743	SET_SIZE(highbit)
2744
2745#endif	/* __i386 */
2746#endif	/* __lint */
2747
2748#if defined(__lint)
2749
2750/*ARGSUSED*/
2751uint64_t
2752rdmsr(uint_t r)
2753{ return (0); }
2754
2755/*ARGSUSED*/
2756void
2757wrmsr(uint_t r, const uint64_t val)
2758{}
2759
2760/*ARGSUSED*/
2761uint64_t
2762xrdmsr(uint_t r)
2763{ return (0); }
2764
2765/*ARGSUSED*/
2766void
2767xwrmsr(uint_t r, const uint64_t val)
2768{}
2769
2770void
2771invalidate_cache(void)
2772{}
2773
2774#else  /* __lint */
2775
2776#define	XMSR_ACCESS_VAL		$0x9c5a203a
2777
2778#if defined(__amd64)
2779
2780	ENTRY(rdmsr)
2781	movl	%edi, %ecx
2782	rdmsr
2783	shlq	$32, %rdx
2784	orq	%rdx, %rax
2785	ret
2786	SET_SIZE(rdmsr)
2787
2788	ENTRY(wrmsr)
2789	movq	%rsi, %rdx
2790	shrq	$32, %rdx
2791	movl	%esi, %eax
2792	movl	%edi, %ecx
2793	wrmsr
2794	ret
2795	SET_SIZE(wrmsr)
2796
2797	ENTRY(xrdmsr)
2798	pushq	%rbp
2799	movq	%rsp, %rbp
2800	movl	%edi, %ecx
2801	movl	XMSR_ACCESS_VAL, %edi	/* this value is needed to access MSR */
2802	rdmsr
2803	shlq	$32, %rdx
2804	orq	%rdx, %rax
2805	leave
2806	ret
2807	SET_SIZE(xrdmsr)
2808
2809	ENTRY(xwrmsr)
2810	pushq	%rbp
2811	movq	%rsp, %rbp
2812	movl	%edi, %ecx
2813	movl	XMSR_ACCESS_VAL, %edi	/* this value is needed to access MSR */
2814	movq	%rsi, %rdx
2815	shrq	$32, %rdx
2816	movl	%esi, %eax
2817	wrmsr
2818	leave
2819	ret
2820	SET_SIZE(xwrmsr)
2821
2822#elif defined(__i386)
2823
2824	ENTRY(rdmsr)
2825	movl	4(%esp), %ecx
2826	rdmsr
2827	ret
2828	SET_SIZE(rdmsr)
2829
2830	ENTRY(wrmsr)
2831	movl	4(%esp), %ecx
2832	movl	8(%esp), %eax
2833	movl	12(%esp), %edx
2834	wrmsr
2835	ret
2836	SET_SIZE(wrmsr)
2837
2838	ENTRY(xrdmsr)
2839	pushl	%ebp
2840	movl	%esp, %ebp
2841	movl	8(%esp), %ecx
2842	pushl	%edi
2843	movl	XMSR_ACCESS_VAL, %edi	/* this value is needed to access MSR */
2844	rdmsr
2845	popl	%edi
2846	leave
2847	ret
2848	SET_SIZE(xrdmsr)
2849
2850	ENTRY(xwrmsr)
2851	pushl	%ebp
2852	movl	%esp, %ebp
2853	movl	8(%esp), %ecx
2854	movl	12(%esp), %eax
2855	movl	16(%esp), %edx
2856	pushl	%edi
2857	movl	XMSR_ACCESS_VAL, %edi	/* this value is needed to access MSR */
2858	wrmsr
2859	popl	%edi
2860	leave
2861	ret
2862	SET_SIZE(xwrmsr)
2863
2864#endif	/* __i386 */
2865
2866	ENTRY(invalidate_cache)
2867	wbinvd
2868	ret
2869	SET_SIZE(invalidate_cache)
2870
2871#endif	/* __lint */
2872
2873#if defined(__lint)
2874
2875/*ARGSUSED*/
2876void
2877getcregs(struct cregs *crp)
2878{}
2879
2880#else	/* __lint */
2881
2882#if defined(__amd64)
2883
2884	ENTRY_NP(getcregs)
2885#if defined(__xpv)
2886	/*
2887	 * Only a few of the hardware control registers or descriptor tables
2888	 * are directly accessible to us, so just zero the structure.
2889	 *
2890	 * XXPV	Perhaps it would be helpful for the hypervisor to return
2891	 *	virtualized versions of these for post-mortem use.
2892	 *	(Need to reevaluate - perhaps it already does!)
2893	 */
2894	pushq	%rdi		/* save *crp */
2895	movq	$CREGSZ, %rsi
2896	call	bzero
2897	popq	%rdi
2898
2899	/*
2900	 * Dump what limited information we can
2901	 */
2902	movq	%cr0, %rax
2903	movq	%rax, CREG_CR0(%rdi)	/* cr0 */
2904	movq	%cr2, %rax
2905	movq	%rax, CREG_CR2(%rdi)	/* cr2 */
2906	movq	%cr3, %rax
2907	movq	%rax, CREG_CR3(%rdi)	/* cr3 */
2908	movq	%cr4, %rax
2909	movq	%rax, CREG_CR4(%rdi)	/* cr4 */
2910
2911#else	/* __xpv */
2912
2913#define	GETMSR(r, off, d)	\
2914	movl	$r, %ecx;	\
2915	rdmsr;			\
2916	movl	%eax, off(d);	\
2917	movl	%edx, off+4(d)
2918
2919	xorl	%eax, %eax
2920	movq	%rax, CREG_GDT+8(%rdi)
2921	sgdt	CREG_GDT(%rdi)		/* 10 bytes */
2922	movq	%rax, CREG_IDT+8(%rdi)
2923	sidt	CREG_IDT(%rdi)		/* 10 bytes */
2924	movq	%rax, CREG_LDT(%rdi)
2925	sldt	CREG_LDT(%rdi)		/* 2 bytes */
2926	movq	%rax, CREG_TASKR(%rdi)
2927	str	CREG_TASKR(%rdi)	/* 2 bytes */
2928	movq	%cr0, %rax
2929	movq	%rax, CREG_CR0(%rdi)	/* cr0 */
2930	movq	%cr2, %rax
2931	movq	%rax, CREG_CR2(%rdi)	/* cr2 */
2932	movq	%cr3, %rax
2933	movq	%rax, CREG_CR3(%rdi)	/* cr3 */
2934	movq	%cr4, %rax
2935	movq	%rax, CREG_CR4(%rdi)	/* cr4 */
2936	movq	%cr8, %rax
2937	movq	%rax, CREG_CR8(%rdi)	/* cr8 */
2938	GETMSR(MSR_AMD_KGSBASE, CREG_KGSBASE, %rdi)
2939	GETMSR(MSR_AMD_EFER, CREG_EFER, %rdi)
2940#endif	/* __xpv */
2941	ret
2942	SET_SIZE(getcregs)
2943
2944#undef GETMSR
2945
2946#elif defined(__i386)
2947
2948	ENTRY_NP(getcregs)
2949#if defined(__xpv)
2950	/*
2951	 * Only a few of the hardware control registers or descriptor tables
2952	 * are directly accessible to us, so just zero the structure.
2953	 *
2954	 * XXPV	Perhaps it would be helpful for the hypervisor to return
2955	 *	virtualized versions of these for post-mortem use.
2956	 *	(Need to reevaluate - perhaps it already does!)
2957	 */
2958	movl	4(%esp), %edx
2959	pushl	$CREGSZ
2960	pushl	%edx
2961	call	bzero
2962	addl	$8, %esp
2963	movl	4(%esp), %edx
2964
2965	/*
2966	 * Dump what limited information we can
2967	 */
2968	movl	%cr0, %eax
2969	movl	%eax, CREG_CR0(%edx)	/* cr0 */
2970	movl	%cr2, %eax
2971	movl	%eax, CREG_CR2(%edx)	/* cr2 */
2972	movl	%cr3, %eax
2973	movl	%eax, CREG_CR3(%edx)	/* cr3 */
2974	movl	%cr4, %eax
2975	movl	%eax, CREG_CR4(%edx)	/* cr4 */
2976
2977#else	/* __xpv */
2978
2979	movl	4(%esp), %edx
2980	movw	$0, CREG_GDT+6(%edx)
2981	movw	$0, CREG_IDT+6(%edx)
2982	sgdt	CREG_GDT(%edx)		/* gdt */
2983	sidt	CREG_IDT(%edx)		/* idt */
2984	sldt	CREG_LDT(%edx)		/* ldt */
2985	str	CREG_TASKR(%edx)	/* task */
2986	movl	%cr0, %eax
2987	movl	%eax, CREG_CR0(%edx)	/* cr0 */
2988	movl	%cr2, %eax
2989	movl	%eax, CREG_CR2(%edx)	/* cr2 */
2990	movl	%cr3, %eax
2991	movl	%eax, CREG_CR3(%edx)	/* cr3 */
2992	testl	$X86_LARGEPAGE, x86_feature
2993	jz	.nocr4
2994	movl	%cr4, %eax
2995	movl	%eax, CREG_CR4(%edx)	/* cr4 */
2996	jmp	.skip
2997.nocr4:
2998	movl	$0, CREG_CR4(%edx)
2999.skip:
3000#endif
3001	ret
3002	SET_SIZE(getcregs)
3003
3004#endif	/* __i386 */
3005#endif	/* __lint */
3006
3007
3008/*
3009 * A panic trigger is a word which is updated atomically and can only be set
3010 * once.  We atomically store 0xDEFACEDD and load the old value.  If the
3011 * previous value was 0, we succeed and return 1; otherwise return 0.
3012 * This allows a partially corrupt trigger to still trigger correctly.  DTrace
3013 * has its own version of this function to allow it to panic correctly from
3014 * probe context.
3015 */
3016#if defined(__lint)
3017
3018/*ARGSUSED*/
3019int
3020panic_trigger(int *tp)
3021{ return (0); }
3022
3023/*ARGSUSED*/
3024int
3025dtrace_panic_trigger(int *tp)
3026{ return (0); }
3027
3028#else	/* __lint */
3029
3030#if defined(__amd64)
3031
3032	ENTRY_NP(panic_trigger)
3033	xorl	%eax, %eax
3034	movl	$0xdefacedd, %edx
3035	lock
3036	  xchgl	%edx, (%rdi)
3037	cmpl	$0, %edx
3038	je	0f
3039	movl	$0, %eax
3040	ret
30410:	movl	$1, %eax
3042	ret
3043	SET_SIZE(panic_trigger)
3044
3045	ENTRY_NP(dtrace_panic_trigger)
3046	xorl	%eax, %eax
3047	movl	$0xdefacedd, %edx
3048	lock
3049	  xchgl	%edx, (%rdi)
3050	cmpl	$0, %edx
3051	je	0f
3052	movl	$0, %eax
3053	ret
30540:	movl	$1, %eax
3055	ret
3056	SET_SIZE(dtrace_panic_trigger)
3057
3058#elif defined(__i386)
3059
3060	ENTRY_NP(panic_trigger)
3061	movl	4(%esp), %edx		/ %edx = address of trigger
3062	movl	$0xdefacedd, %eax	/ %eax = 0xdefacedd
3063	lock				/ assert lock
3064	xchgl %eax, (%edx)		/ exchange %eax and the trigger
3065	cmpl	$0, %eax		/ if (%eax == 0x0)
3066	je	0f			/   return (1);
3067	movl	$0, %eax		/ else
3068	ret				/   return (0);
30690:	movl	$1, %eax
3070	ret
3071	SET_SIZE(panic_trigger)
3072
3073	ENTRY_NP(dtrace_panic_trigger)
3074	movl	4(%esp), %edx		/ %edx = address of trigger
3075	movl	$0xdefacedd, %eax	/ %eax = 0xdefacedd
3076	lock				/ assert lock
3077	xchgl %eax, (%edx)		/ exchange %eax and the trigger
3078	cmpl	$0, %eax		/ if (%eax == 0x0)
3079	je	0f			/   return (1);
3080	movl	$0, %eax		/ else
3081	ret				/   return (0);
30820:	movl	$1, %eax
3083	ret
3084	SET_SIZE(dtrace_panic_trigger)
3085
3086#endif	/* __i386 */
3087#endif	/* __lint */
3088
3089/*
3090 * The panic() and cmn_err() functions invoke vpanic() as a common entry point
3091 * into the panic code implemented in panicsys().  vpanic() is responsible
3092 * for passing through the format string and arguments, and constructing a
3093 * regs structure on the stack into which it saves the current register
3094 * values.  If we are not dying due to a fatal trap, these registers will
3095 * then be preserved in panicbuf as the current processor state.  Before
3096 * invoking panicsys(), vpanic() activates the first panic trigger (see
3097 * common/os/panic.c) and switches to the panic_stack if successful.  Note that
3098 * DTrace takes a slightly different panic path if it must panic from probe
3099 * context.  Instead of calling panic, it calls into dtrace_vpanic(), which
3100 * sets up the initial stack as vpanic does, calls dtrace_panic_trigger(), and
3101 * branches back into vpanic().
3102 */
3103#if defined(__lint)
3104
3105/*ARGSUSED*/
3106void
3107vpanic(const char *format, va_list alist)
3108{}
3109
3110/*ARGSUSED*/
3111void
3112dtrace_vpanic(const char *format, va_list alist)
3113{}
3114
3115#else	/* __lint */
3116
3117#if defined(__amd64)
3118
3119	ENTRY_NP(vpanic)			/* Initial stack layout: */
3120
3121	pushq	%rbp				/* | %rip | 	0x60	*/
3122	movq	%rsp, %rbp			/* | %rbp |	0x58	*/
3123	pushfq					/* | rfl  |	0x50	*/
3124	pushq	%r11				/* | %r11 |	0x48	*/
3125	pushq	%r10				/* | %r10 |	0x40	*/
3126	pushq	%rbx				/* | %rbx |	0x38	*/
3127	pushq	%rax				/* | %rax |	0x30	*/
3128	pushq	%r9				/* | %r9  |	0x28	*/
3129	pushq	%r8				/* | %r8  |	0x20	*/
3130	pushq	%rcx				/* | %rcx |	0x18	*/
3131	pushq	%rdx				/* | %rdx |	0x10	*/
3132	pushq	%rsi				/* | %rsi |	0x8 alist */
3133	pushq	%rdi				/* | %rdi |	0x0 format */
3134
3135	movq	%rsp, %rbx			/* %rbx = current %rsp */
3136
3137	leaq	panic_quiesce(%rip), %rdi	/* %rdi = &panic_quiesce */
3138	call	panic_trigger			/* %eax = panic_trigger() */
3139
3140vpanic_common:
3141	/*
3142	 * The panic_trigger result is in %eax from the call above, and
3143	 * dtrace_panic places it in %eax before branching here.
3144	 * The rdmsr instructions that follow below will clobber %eax so
3145	 * we stash the panic_trigger result in %r11d.
3146	 */
3147	movl	%eax, %r11d
3148	cmpl	$0, %r11d
3149	je	0f
3150
3151	/*
3152	 * If panic_trigger() was successful, we are the first to initiate a
3153	 * panic: we now switch to the reserved panic_stack before continuing.
3154	 */
3155	leaq	panic_stack(%rip), %rsp
3156	addq	$PANICSTKSIZE, %rsp
31570:	subq	$REGSIZE, %rsp
3158	/*
3159	 * Now that we've got everything set up, store the register values as
3160	 * they were when we entered vpanic() to the designated location in
3161	 * the regs structure we allocated on the stack.
3162	 */
3163	movq	0x0(%rbx), %rcx
3164	movq	%rcx, REGOFF_RDI(%rsp)
3165	movq	0x8(%rbx), %rcx
3166	movq	%rcx, REGOFF_RSI(%rsp)
3167	movq	0x10(%rbx), %rcx
3168	movq	%rcx, REGOFF_RDX(%rsp)
3169	movq	0x18(%rbx), %rcx
3170	movq	%rcx, REGOFF_RCX(%rsp)
3171	movq	0x20(%rbx), %rcx
3172
3173	movq	%rcx, REGOFF_R8(%rsp)
3174	movq	0x28(%rbx), %rcx
3175	movq	%rcx, REGOFF_R9(%rsp)
3176	movq	0x30(%rbx), %rcx
3177	movq	%rcx, REGOFF_RAX(%rsp)
3178	movq	0x38(%rbx), %rcx
3179	movq	%rcx, REGOFF_RBX(%rsp)
3180	movq	0x58(%rbx), %rcx
3181
3182	movq	%rcx, REGOFF_RBP(%rsp)
3183	movq	0x40(%rbx), %rcx
3184	movq	%rcx, REGOFF_R10(%rsp)
3185	movq	0x48(%rbx), %rcx
3186	movq	%rcx, REGOFF_R11(%rsp)
3187	movq	%r12, REGOFF_R12(%rsp)
3188
3189	movq	%r13, REGOFF_R13(%rsp)
3190	movq	%r14, REGOFF_R14(%rsp)
3191	movq	%r15, REGOFF_R15(%rsp)
3192
3193	xorl	%ecx, %ecx
3194	movw	%ds, %cx
3195	movq	%rcx, REGOFF_DS(%rsp)
3196	movw	%es, %cx
3197	movq	%rcx, REGOFF_ES(%rsp)
3198	movw	%fs, %cx
3199	movq	%rcx, REGOFF_FS(%rsp)
3200	movw	%gs, %cx
3201	movq	%rcx, REGOFF_GS(%rsp)
3202
3203	movq	$0, REGOFF_TRAPNO(%rsp)
3204
3205	movq	$0, REGOFF_ERR(%rsp)
3206	leaq	vpanic(%rip), %rcx
3207	movq	%rcx, REGOFF_RIP(%rsp)
3208	movw	%cs, %cx
3209	movzwq	%cx, %rcx
3210	movq	%rcx, REGOFF_CS(%rsp)
3211	movq	0x50(%rbx), %rcx
3212	movq	%rcx, REGOFF_RFL(%rsp)
3213	movq	%rbx, %rcx
3214	addq	$0x60, %rcx
3215	movq	%rcx, REGOFF_RSP(%rsp)
3216	movw	%ss, %cx
3217	movzwq	%cx, %rcx
3218	movq	%rcx, REGOFF_SS(%rsp)
3219
3220	/*
3221	 * panicsys(format, alist, rp, on_panic_stack)
3222	 */
3223	movq	REGOFF_RDI(%rsp), %rdi		/* format */
3224	movq	REGOFF_RSI(%rsp), %rsi		/* alist */
3225	movq	%rsp, %rdx			/* struct regs */
3226	movl	%r11d, %ecx			/* on_panic_stack */
3227	call	panicsys
3228	addq	$REGSIZE, %rsp
3229	popq	%rdi
3230	popq	%rsi
3231	popq	%rdx
3232	popq	%rcx
3233	popq	%r8
3234	popq	%r9
3235	popq	%rax
3236	popq	%rbx
3237	popq	%r10
3238	popq	%r11
3239	popfq
3240	leave
3241	ret
3242	SET_SIZE(vpanic)
3243
3244	ENTRY_NP(dtrace_vpanic)			/* Initial stack layout: */
3245
3246	pushq	%rbp				/* | %rip | 	0x60	*/
3247	movq	%rsp, %rbp			/* | %rbp |	0x58	*/
3248	pushfq					/* | rfl  |	0x50	*/
3249	pushq	%r11				/* | %r11 |	0x48	*/
3250	pushq	%r10				/* | %r10 |	0x40	*/
3251	pushq	%rbx				/* | %rbx |	0x38	*/
3252	pushq	%rax				/* | %rax |	0x30	*/
3253	pushq	%r9				/* | %r9  |	0x28	*/
3254	pushq	%r8				/* | %r8  |	0x20	*/
3255	pushq	%rcx				/* | %rcx |	0x18	*/
3256	pushq	%rdx				/* | %rdx |	0x10	*/
3257	pushq	%rsi				/* | %rsi |	0x8 alist */
3258	pushq	%rdi				/* | %rdi |	0x0 format */
3259
3260	movq	%rsp, %rbx			/* %rbx = current %rsp */
3261
3262	leaq	panic_quiesce(%rip), %rdi	/* %rdi = &panic_quiesce */
3263	call	dtrace_panic_trigger	/* %eax = dtrace_panic_trigger() */
3264	jmp	vpanic_common
3265
3266	SET_SIZE(dtrace_vpanic)
3267
3268#elif defined(__i386)
3269
3270	ENTRY_NP(vpanic)			/ Initial stack layout:
3271
3272	pushl	%ebp				/ | %eip | 20
3273	movl	%esp, %ebp			/ | %ebp | 16
3274	pushl	%eax				/ | %eax | 12
3275	pushl	%ebx				/ | %ebx |  8
3276	pushl	%ecx				/ | %ecx |  4
3277	pushl	%edx				/ | %edx |  0
3278
3279	movl	%esp, %ebx			/ %ebx = current stack pointer
3280
3281	lea	panic_quiesce, %eax		/ %eax = &panic_quiesce
3282	pushl	%eax				/ push &panic_quiesce
3283	call	panic_trigger			/ %eax = panic_trigger()
3284	addl	$4, %esp			/ reset stack pointer
3285
3286vpanic_common:
3287	cmpl	$0, %eax			/ if (%eax == 0)
3288	je	0f				/   goto 0f;
3289
3290	/*
3291	 * If panic_trigger() was successful, we are the first to initiate a
3292	 * panic: we now switch to the reserved panic_stack before continuing.
3293	 */
3294	lea	panic_stack, %esp		/ %esp  = panic_stack
3295	addl	$PANICSTKSIZE, %esp		/ %esp += PANICSTKSIZE
3296
32970:	subl	$REGSIZE, %esp			/ allocate struct regs
3298
3299	/*
3300	 * Now that we've got everything set up, store the register values as
3301	 * they were when we entered vpanic() to the designated location in
3302	 * the regs structure we allocated on the stack.
3303	 */
3304#if !defined(__GNUC_AS__)
3305	movw	%gs, %edx
3306	movl	%edx, REGOFF_GS(%esp)
3307	movw	%fs, %edx
3308	movl	%edx, REGOFF_FS(%esp)
3309	movw	%es, %edx
3310	movl	%edx, REGOFF_ES(%esp)
3311	movw	%ds, %edx
3312	movl	%edx, REGOFF_DS(%esp)
3313#else	/* __GNUC_AS__ */
3314	mov	%gs, %edx
3315	mov	%edx, REGOFF_GS(%esp)
3316	mov	%fs, %edx
3317	mov	%edx, REGOFF_FS(%esp)
3318	mov	%es, %edx
3319	mov	%edx, REGOFF_ES(%esp)
3320	mov	%ds, %edx
3321	mov	%edx, REGOFF_DS(%esp)
3322#endif	/* __GNUC_AS__ */
3323	movl	%edi, REGOFF_EDI(%esp)
3324	movl	%esi, REGOFF_ESI(%esp)
3325	movl	16(%ebx), %ecx
3326	movl	%ecx, REGOFF_EBP(%esp)
3327	movl	%ebx, %ecx
3328	addl	$20, %ecx
3329	movl	%ecx, REGOFF_ESP(%esp)
3330	movl	8(%ebx), %ecx
3331	movl	%ecx, REGOFF_EBX(%esp)
3332	movl	0(%ebx), %ecx
3333	movl	%ecx, REGOFF_EDX(%esp)
3334	movl	4(%ebx), %ecx
3335	movl	%ecx, REGOFF_ECX(%esp)
3336	movl	12(%ebx), %ecx
3337	movl	%ecx, REGOFF_EAX(%esp)
3338	movl	$0, REGOFF_TRAPNO(%esp)
3339	movl	$0, REGOFF_ERR(%esp)
3340	lea	vpanic, %ecx
3341	movl	%ecx, REGOFF_EIP(%esp)
3342#if !defined(__GNUC_AS__)
3343	movw	%cs, %edx
3344#else	/* __GNUC_AS__ */
3345	mov	%cs, %edx
3346#endif	/* __GNUC_AS__ */
3347	movl	%edx, REGOFF_CS(%esp)
3348	pushfl
3349	popl	%ecx
3350#if defined(__xpv)
3351	/*
3352	 * Synthesize the PS_IE bit from the event mask bit
3353	 */
3354	CURTHREAD(%edx)
3355	KPREEMPT_DISABLE(%edx)
3356	EVENT_MASK_TO_IE(%edx, %ecx)
3357	CURTHREAD(%edx)
3358	KPREEMPT_ENABLE_NOKP(%edx)
3359#endif
3360	movl	%ecx, REGOFF_EFL(%esp)
3361	movl	$0, REGOFF_UESP(%esp)
3362#if !defined(__GNUC_AS__)
3363	movw	%ss, %edx
3364#else	/* __GNUC_AS__ */
3365	mov	%ss, %edx
3366#endif	/* __GNUC_AS__ */
3367	movl	%edx, REGOFF_SS(%esp)
3368
3369	movl	%esp, %ecx			/ %ecx = &regs
3370	pushl	%eax				/ push on_panic_stack
3371	pushl	%ecx				/ push &regs
3372	movl	12(%ebp), %ecx			/ %ecx = alist
3373	pushl	%ecx				/ push alist
3374	movl	8(%ebp), %ecx			/ %ecx = format
3375	pushl	%ecx				/ push format
3376	call	panicsys			/ panicsys();
3377	addl	$16, %esp			/ pop arguments
3378
3379	addl	$REGSIZE, %esp
3380	popl	%edx
3381	popl	%ecx
3382	popl	%ebx
3383	popl	%eax
3384	leave
3385	ret
3386	SET_SIZE(vpanic)
3387
3388	ENTRY_NP(dtrace_vpanic)			/ Initial stack layout:
3389
3390	pushl	%ebp				/ | %eip | 20
3391	movl	%esp, %ebp			/ | %ebp | 16
3392	pushl	%eax				/ | %eax | 12
3393	pushl	%ebx				/ | %ebx |  8
3394	pushl	%ecx				/ | %ecx |  4
3395	pushl	%edx				/ | %edx |  0
3396
3397	movl	%esp, %ebx			/ %ebx = current stack pointer
3398
3399	lea	panic_quiesce, %eax		/ %eax = &panic_quiesce
3400	pushl	%eax				/ push &panic_quiesce
3401	call	dtrace_panic_trigger		/ %eax = dtrace_panic_trigger()
3402	addl	$4, %esp			/ reset stack pointer
3403	jmp	vpanic_common			/ jump back to common code
3404
3405	SET_SIZE(dtrace_vpanic)
3406
3407#endif	/* __i386 */
3408#endif	/* __lint */
3409
3410#if defined(__lint)
3411
3412void
3413hres_tick(void)
3414{}
3415
3416int64_t timedelta;
3417hrtime_t hres_last_tick;
3418volatile timestruc_t hrestime;
3419int64_t hrestime_adj;
3420volatile int hres_lock;
3421hrtime_t hrtime_base;
3422
3423#else	/* __lint */
3424
3425	DGDEF3(hrestime, _MUL(2, CLONGSIZE), 8)
3426	.NWORD	0, 0
3427
3428	DGDEF3(hrestime_adj, 8, 8)
3429	.long	0, 0
3430
3431	DGDEF3(hres_last_tick, 8, 8)
3432	.long	0, 0
3433
3434	DGDEF3(timedelta, 8, 8)
3435	.long	0, 0
3436
3437	DGDEF3(hres_lock, 4, 8)
3438	.long	0
3439
3440	/*
3441	 * initialized to a non zero value to make pc_gethrtime()
3442	 * work correctly even before clock is initialized
3443	 */
3444	DGDEF3(hrtime_base, 8, 8)
3445	.long	_MUL(NSEC_PER_CLOCK_TICK, 6), 0
3446
3447	DGDEF3(adj_shift, 4, 4)
3448	.long	ADJ_SHIFT
3449
3450#if defined(__amd64)
3451
3452	ENTRY_NP(hres_tick)
3453	pushq	%rbp
3454	movq	%rsp, %rbp
3455
3456	/*
3457	 * We need to call *gethrtimef before picking up CLOCK_LOCK (obviously,
3458	 * hres_last_tick can only be modified while holding CLOCK_LOCK).
3459	 * At worst, performing this now instead of under CLOCK_LOCK may
3460	 * introduce some jitter in pc_gethrestime().
3461	 */
3462	call	*gethrtimef(%rip)
3463	movq	%rax, %r8
3464
3465	leaq	hres_lock(%rip), %rax
3466	movb	$-1, %dl
3467.CL1:
3468	xchgb	%dl, (%rax)
3469	testb	%dl, %dl
3470	jz	.CL3			/* got it */
3471.CL2:
3472	cmpb	$0, (%rax)		/* possible to get lock? */
3473	pause
3474	jne	.CL2
3475	jmp	.CL1			/* yes, try again */
3476.CL3:
3477	/*
3478	 * compute the interval since last time hres_tick was called
3479	 * and adjust hrtime_base and hrestime accordingly
3480	 * hrtime_base is an 8 byte value (in nsec), hrestime is
3481	 * a timestruc_t (sec, nsec)
3482	 */
3483	leaq	hres_last_tick(%rip), %rax
3484	movq	%r8, %r11
3485	subq	(%rax), %r8
3486	addq	%r8, hrtime_base(%rip)	/* add interval to hrtime_base */
3487	addq	%r8, hrestime+8(%rip)	/* add interval to hrestime.tv_nsec */
3488	/*
3489	 * Now that we have CLOCK_LOCK, we can update hres_last_tick
3490	 */
3491	movq	%r11, (%rax)
3492
3493	call	__adj_hrestime
3494
3495	/*
3496	 * release the hres_lock
3497	 */
3498	incl	hres_lock(%rip)
3499	leave
3500	ret
3501	SET_SIZE(hres_tick)
3502
3503#elif defined(__i386)
3504
3505	ENTRY_NP(hres_tick)
3506	pushl	%ebp
3507	movl	%esp, %ebp
3508	pushl	%esi
3509	pushl	%ebx
3510
3511	/*
3512	 * We need to call *gethrtimef before picking up CLOCK_LOCK (obviously,
3513	 * hres_last_tick can only be modified while holding CLOCK_LOCK).
3514	 * At worst, performing this now instead of under CLOCK_LOCK may
3515	 * introduce some jitter in pc_gethrestime().
3516	 */
3517	call	*gethrtimef
3518	movl	%eax, %ebx
3519	movl	%edx, %esi
3520
3521	movl	$hres_lock, %eax
3522	movl	$-1, %edx
3523.CL1:
3524	xchgb	%dl, (%eax)
3525	testb	%dl, %dl
3526	jz	.CL3			/ got it
3527.CL2:
3528	cmpb	$0, (%eax)		/ possible to get lock?
3529	pause
3530	jne	.CL2
3531	jmp	.CL1			/ yes, try again
3532.CL3:
3533	/*
3534	 * compute the interval since last time hres_tick was called
3535	 * and adjust hrtime_base and hrestime accordingly
3536	 * hrtime_base is an 8 byte value (in nsec), hrestime is
3537	 * timestruc_t (sec, nsec)
3538	 */
3539
3540	lea	hres_last_tick, %eax
3541
3542	movl	%ebx, %edx
3543	movl	%esi, %ecx
3544
3545	subl 	(%eax), %edx
3546	sbbl 	4(%eax), %ecx
3547
3548	addl	%edx, hrtime_base	/ add interval to hrtime_base
3549	adcl	%ecx, hrtime_base+4
3550
3551	addl 	%edx, hrestime+4	/ add interval to hrestime.tv_nsec
3552
3553	/
3554	/ Now that we have CLOCK_LOCK, we can update hres_last_tick.
3555	/
3556	movl	%ebx, (%eax)
3557	movl	%esi,  4(%eax)
3558
3559	/ get hrestime at this moment. used as base for pc_gethrestime
3560	/
3561	/ Apply adjustment, if any
3562	/
3563	/ #define HRES_ADJ	(NSEC_PER_CLOCK_TICK >> ADJ_SHIFT)
3564	/ (max_hres_adj)
3565	/
3566	/ void
3567	/ adj_hrestime()
3568	/ {
3569	/	long long adj;
3570	/
3571	/	if (hrestime_adj == 0)
3572	/		adj = 0;
3573	/	else if (hrestime_adj > 0) {
3574	/		if (hrestime_adj < HRES_ADJ)
3575	/			adj = hrestime_adj;
3576	/		else
3577	/			adj = HRES_ADJ;
3578	/	}
3579	/	else {
3580	/		if (hrestime_adj < -(HRES_ADJ))
3581	/			adj = -(HRES_ADJ);
3582	/		else
3583	/			adj = hrestime_adj;
3584	/	}
3585	/
3586	/	timedelta -= adj;
3587	/	hrestime_adj = timedelta;
3588	/	hrestime.tv_nsec += adj;
3589	/
3590	/	while (hrestime.tv_nsec >= NANOSEC) {
3591	/		one_sec++;
3592	/		hrestime.tv_sec++;
3593	/		hrestime.tv_nsec -= NANOSEC;
3594	/	}
3595	/ }
3596__adj_hrestime:
3597	movl	hrestime_adj, %esi	/ if (hrestime_adj == 0)
3598	movl	hrestime_adj+4, %edx
3599	andl	%esi, %esi
3600	jne	.CL4			/ no
3601	andl	%edx, %edx
3602	jne	.CL4			/ no
3603	subl	%ecx, %ecx		/ yes, adj = 0;
3604	subl	%edx, %edx
3605	jmp	.CL5
3606.CL4:
3607	subl	%ecx, %ecx
3608	subl	%eax, %eax
3609	subl	%esi, %ecx
3610	sbbl	%edx, %eax
3611	andl	%eax, %eax		/ if (hrestime_adj > 0)
3612	jge	.CL6
3613
3614	/ In the following comments, HRES_ADJ is used, while in the code
3615	/ max_hres_adj is used.
3616	/
3617	/ The test for "hrestime_adj < HRES_ADJ" is complicated because
3618	/ hrestime_adj is 64-bits, while HRES_ADJ is 32-bits.  We rely
3619	/ on the logical equivalence of:
3620	/
3621	/	!(hrestime_adj < HRES_ADJ)
3622	/
3623	/ and the two step sequence:
3624	/
3625	/	(HRES_ADJ - lsw(hrestime_adj)) generates a Borrow/Carry
3626	/
3627	/ which computes whether or not the least significant 32-bits
3628	/ of hrestime_adj is greater than HRES_ADJ, followed by:
3629	/
3630	/	Previous Borrow/Carry + -1 + msw(hrestime_adj) generates a Carry
3631	/
3632	/ which generates a carry whenever step 1 is true or the most
3633	/ significant long of the longlong hrestime_adj is non-zero.
3634
3635	movl	max_hres_adj, %ecx	/ hrestime_adj is positive
3636	subl	%esi, %ecx
3637	movl	%edx, %eax
3638	adcl	$-1, %eax
3639	jnc	.CL7
3640	movl	max_hres_adj, %ecx	/ adj = HRES_ADJ;
3641	subl	%edx, %edx
3642	jmp	.CL5
3643
3644	/ The following computation is similar to the one above.
3645	/
3646	/ The test for "hrestime_adj < -(HRES_ADJ)" is complicated because
3647	/ hrestime_adj is 64-bits, while HRES_ADJ is 32-bits.  We rely
3648	/ on the logical equivalence of:
3649	/
3650	/	(hrestime_adj > -HRES_ADJ)
3651	/
3652	/ and the two step sequence:
3653	/
3654	/	(HRES_ADJ + lsw(hrestime_adj)) generates a Carry
3655	/
3656	/ which means the least significant 32-bits of hrestime_adj is
3657	/ greater than -HRES_ADJ, followed by:
3658	/
3659	/	Previous Carry + 0 + msw(hrestime_adj) generates a Carry
3660	/
3661	/ which generates a carry only when step 1 is true and the most
3662	/ significant long of the longlong hrestime_adj is -1.
3663
3664.CL6:					/ hrestime_adj is negative
3665	movl	%esi, %ecx
3666	addl	max_hres_adj, %ecx
3667	movl	%edx, %eax
3668	adcl	$0, %eax
3669	jc	.CL7
3670	xor	%ecx, %ecx
3671	subl	max_hres_adj, %ecx	/ adj = -(HRES_ADJ);
3672	movl	$-1, %edx
3673	jmp	.CL5
3674.CL7:
3675	movl	%esi, %ecx		/ adj = hrestime_adj;
3676.CL5:
3677	movl	timedelta, %esi
3678	subl	%ecx, %esi
3679	movl	timedelta+4, %eax
3680	sbbl	%edx, %eax
3681	movl	%esi, timedelta
3682	movl	%eax, timedelta+4	/ timedelta -= adj;
3683	movl	%esi, hrestime_adj
3684	movl	%eax, hrestime_adj+4	/ hrestime_adj = timedelta;
3685	addl	hrestime+4, %ecx
3686
3687	movl	%ecx, %eax		/ eax = tv_nsec
36881:
3689	cmpl	$NANOSEC, %eax		/ if ((unsigned long)tv_nsec >= NANOSEC)
3690	jb	.CL8			/ no
3691	incl	one_sec			/ yes,  one_sec++;
3692	incl	hrestime		/ hrestime.tv_sec++;
3693	addl	$-NANOSEC, %eax		/ tv_nsec -= NANOSEC
3694	jmp	1b			/ check for more seconds
3695
3696.CL8:
3697	movl	%eax, hrestime+4	/ store final into hrestime.tv_nsec
3698	incl	hres_lock		/ release the hres_lock
3699
3700	popl	%ebx
3701	popl	%esi
3702	leave
3703	ret
3704	SET_SIZE(hres_tick)
3705
3706#endif	/* __i386 */
3707#endif	/* __lint */
3708
3709/*
3710 * void prefetch_smap_w(void *)
3711 *
3712 * Prefetch ahead within a linear list of smap structures.
3713 * Not implemented for ia32.  Stub for compatibility.
3714 */
3715
3716#if defined(__lint)
3717
3718/*ARGSUSED*/
3719void prefetch_smap_w(void *smp)
3720{}
3721
3722#else	/* __lint */
3723
3724	ENTRY(prefetch_smap_w)
3725	rep;	ret	/* use 2 byte return instruction when branch target */
3726			/* AMD Software Optimization Guide - Section 6.2 */
3727	SET_SIZE(prefetch_smap_w)
3728
3729#endif	/* __lint */
3730
3731/*
3732 * prefetch_page_r(page_t *)
3733 * issue prefetch instructions for a page_t
3734 */
3735#if defined(__lint)
3736
3737/*ARGSUSED*/
3738void
3739prefetch_page_r(void *pp)
3740{}
3741
3742#else	/* __lint */
3743
3744	ENTRY(prefetch_page_r)
3745	rep;	ret	/* use 2 byte return instruction when branch target */
3746			/* AMD Software Optimization Guide - Section 6.2 */
3747	SET_SIZE(prefetch_page_r)
3748
3749#endif	/* __lint */
3750
3751#if defined(__lint)
3752
3753/*ARGSUSED*/
3754int
3755bcmp(const void *s1, const void *s2, size_t count)
3756{ return (0); }
3757
3758#else   /* __lint */
3759
3760#if defined(__amd64)
3761
3762	ENTRY(bcmp)
3763	pushq	%rbp
3764	movq	%rsp, %rbp
3765#ifdef DEBUG
3766	movq	postbootkernelbase(%rip), %r11
3767	cmpq	%r11, %rdi
3768	jb	0f
3769	cmpq	%r11, %rsi
3770	jnb	1f
37710:	leaq	.bcmp_panic_msg(%rip), %rdi
3772	xorl	%eax, %eax
3773	call	panic
37741:
3775#endif	/* DEBUG */
3776	call	memcmp
3777	testl	%eax, %eax
3778	setne	%dl
3779	leave
3780	movzbl	%dl, %eax
3781	ret
3782	SET_SIZE(bcmp)
3783
3784#elif defined(__i386)
3785
3786#define	ARG_S1		8
3787#define	ARG_S2		12
3788#define	ARG_LENGTH	16
3789
3790	ENTRY(bcmp)
3791	pushl	%ebp
3792	movl	%esp, %ebp	/ create new stack frame
3793#ifdef DEBUG
3794	movl    postbootkernelbase, %eax
3795	cmpl    %eax, ARG_S1(%ebp)
3796	jb	0f
3797	cmpl    %eax, ARG_S2(%ebp)
3798	jnb	1f
37990:	pushl   $.bcmp_panic_msg
3800	call    panic
38011:
3802#endif	/* DEBUG */
3803
3804	pushl	%edi		/ save register variable
3805	movl	ARG_S1(%ebp), %eax	/ %eax = address of string 1
3806	movl	ARG_S2(%ebp), %ecx	/ %ecx = address of string 2
3807	cmpl	%eax, %ecx	/ if the same string
3808	je	.equal		/ goto .equal
3809	movl	ARG_LENGTH(%ebp), %edi	/ %edi = length in bytes
3810	cmpl	$4, %edi	/ if %edi < 4
3811	jb	.byte_check	/ goto .byte_check
3812	.align	4
3813.word_loop:
3814	movl	(%ecx), %edx	/ move 1 word from (%ecx) to %edx
3815	leal	-4(%edi), %edi	/ %edi -= 4
3816	cmpl	(%eax), %edx	/ compare 1 word from (%eax) with %edx
3817	jne	.word_not_equal	/ if not equal, goto .word_not_equal
3818	leal	4(%ecx), %ecx	/ %ecx += 4 (next word)
3819	leal	4(%eax), %eax	/ %eax += 4 (next word)
3820	cmpl	$4, %edi	/ if %edi >= 4
3821	jae	.word_loop	/ goto .word_loop
3822.byte_check:
3823	cmpl	$0, %edi	/ if %edi == 0
3824	je	.equal		/ goto .equal
3825	jmp	.byte_loop	/ goto .byte_loop (checks in bytes)
3826.word_not_equal:
3827	leal	4(%edi), %edi	/ %edi += 4 (post-decremented)
3828	.align	4
3829.byte_loop:
3830	movb	(%ecx),	%dl	/ move 1 byte from (%ecx) to %dl
3831	cmpb	%dl, (%eax)	/ compare %dl with 1 byte from (%eax)
3832	jne	.not_equal	/ if not equal, goto .not_equal
3833	incl	%ecx		/ %ecx++ (next byte)
3834	incl	%eax		/ %eax++ (next byte)
3835	decl	%edi		/ %edi--
3836	jnz	.byte_loop	/ if not zero, goto .byte_loop
3837.equal:
3838	xorl	%eax, %eax	/ %eax = 0
3839	popl	%edi		/ restore register variable
3840	leave			/ restore old stack frame
3841	ret			/ return (NULL)
3842	.align	4
3843.not_equal:
3844	movl	$1, %eax	/ return 1
3845	popl	%edi		/ restore register variable
3846	leave			/ restore old stack frame
3847	ret			/ return (NULL)
3848	SET_SIZE(bcmp)
3849
3850#endif	/* __i386 */
3851
3852#ifdef DEBUG
3853	.text
3854.bcmp_panic_msg:
3855	.string "bcmp: arguments below kernelbase"
3856#endif	/* DEBUG */
3857
3858#endif	/* __lint */
3859
3860#if defined(__lint)
3861
3862uint_t
3863bsrw_insn(uint16_t mask)
3864{
3865	uint_t index = sizeof (mask) * NBBY - 1;
3866
3867	while ((mask & (1 << index)) == 0)
3868		index--;
3869	return (index);
3870}
3871
3872#else	/* __lint */
3873
3874#if defined(__amd64)
3875
3876	ENTRY_NP(bsrw_insn)
3877	xorl	%eax, %eax
3878	bsrw	%di, %ax
3879	ret
3880	SET_SIZE(bsrw_insn)
3881
3882#elif defined(__i386)
3883
3884	ENTRY_NP(bsrw_insn)
3885	movw	4(%esp), %cx
3886	xorl	%eax, %eax
3887	bsrw	%cx, %ax
3888	ret
3889	SET_SIZE(bsrw_insn)
3890
3891#endif	/* __i386 */
3892#endif	/* __lint */
3893
3894#if defined(__lint)
3895
3896uint_t
3897atomic_btr32(uint32_t *pending, uint_t pil)
3898{
3899	return (*pending &= ~(1 << pil));
3900}
3901
3902#else	/* __lint */
3903
3904#if defined(__i386)
3905
3906	ENTRY_NP(atomic_btr32)
3907	movl	4(%esp), %ecx
3908	movl	8(%esp), %edx
3909	xorl	%eax, %eax
3910	lock
3911	btrl	%edx, (%ecx)
3912	setc	%al
3913	ret
3914	SET_SIZE(atomic_btr32)
3915
3916#endif	/* __i386 */
3917#endif	/* __lint */
3918
3919#if defined(__lint)
3920
3921/*ARGSUSED*/
3922void
3923switch_sp_and_call(void *newsp, void (*func)(uint_t, uint_t), uint_t arg1,
3924	    uint_t arg2)
3925{}
3926
3927#else	/* __lint */
3928
3929#if defined(__amd64)
3930
3931	ENTRY_NP(switch_sp_and_call)
3932	pushq	%rbp
3933	movq	%rsp, %rbp		/* set up stack frame */
3934	movq	%rdi, %rsp		/* switch stack pointer */
3935	movq	%rdx, %rdi		/* pass func arg 1 */
3936	movq	%rsi, %r11		/* save function to call */
3937	movq	%rcx, %rsi		/* pass func arg 2 */
3938	call	*%r11			/* call function */
3939	leave				/* restore stack */
3940	ret
3941	SET_SIZE(switch_sp_and_call)
3942
3943#elif defined(__i386)
3944
3945	ENTRY_NP(switch_sp_and_call)
3946	pushl	%ebp
3947	mov	%esp, %ebp		/* set up stack frame */
3948	movl	8(%ebp), %esp		/* switch stack pointer */
3949	pushl	20(%ebp)		/* push func arg 2 */
3950	pushl	16(%ebp)		/* push func arg 1 */
3951	call	*12(%ebp)		/* call function */
3952	addl	$8, %esp		/* pop arguments */
3953	leave				/* restore stack */
3954	ret
3955	SET_SIZE(switch_sp_and_call)
3956
3957#endif	/* __i386 */
3958#endif	/* __lint */
3959
3960#if defined(__lint)
3961
3962void
3963kmdb_enter(void)
3964{}
3965
3966#else	/* __lint */
3967
3968#if defined(__amd64)
3969
3970	ENTRY_NP(kmdb_enter)
3971	pushq	%rbp
3972	movq	%rsp, %rbp
3973
3974	/*
3975	 * Save flags, do a 'cli' then return the saved flags
3976	 */
3977	call	intr_clear
3978
3979	int	$T_DBGENTR
3980
3981	/*
3982	 * Restore the saved flags
3983	 */
3984	movq	%rax, %rdi
3985	call	intr_restore
3986
3987	leave
3988	ret
3989	SET_SIZE(kmdb_enter)
3990
3991#elif defined(__i386)
3992
3993	ENTRY_NP(kmdb_enter)
3994	pushl	%ebp
3995	movl	%esp, %ebp
3996
3997	/*
3998	 * Save flags, do a 'cli' then return the saved flags
3999	 */
4000	call	intr_clear
4001
4002	int	$T_DBGENTR
4003
4004	/*
4005	 * Restore the saved flags
4006	 */
4007	pushl	%eax
4008	call	intr_restore
4009	addl	$4, %esp
4010
4011	leave
4012	ret
4013	SET_SIZE(kmdb_enter)
4014
4015#endif	/* __i386 */
4016#endif	/* __lint */
4017
4018#if defined(__lint)
4019
4020void
4021return_instr(void)
4022{}
4023
4024#else	/* __lint */
4025
4026	ENTRY_NP(return_instr)
4027	rep;	ret	/* use 2 byte instruction when branch target */
4028			/* AMD Software Optimization Guide - Section 6.2 */
4029	SET_SIZE(return_instr)
4030
4031#endif	/* __lint */
4032
4033#if defined(__lint)
4034
4035ulong_t
4036getflags(void)
4037{
4038	return (0);
4039}
4040
4041#else	/* __lint */
4042
4043#if defined(__amd64)
4044
4045	ENTRY(getflags)
4046	pushfq
4047	popq	%rax
4048#if defined(__xpv)
4049	CURTHREAD(%rdi)
4050	KPREEMPT_DISABLE(%rdi)
4051	/*
4052	 * Synthesize the PS_IE bit from the event mask bit
4053	 */
4054	CURVCPU(%r11)
4055	andq    $_BITNOT(PS_IE), %rax
4056	XEN_TEST_UPCALL_MASK(%r11)
4057	jnz	1f
4058	orq	$PS_IE, %rax
40591:
4060	KPREEMPT_ENABLE_NOKP(%rdi)
4061#endif
4062	ret
4063	SET_SIZE(getflags)
4064
4065#elif defined(__i386)
4066
4067	ENTRY(getflags)
4068	pushfl
4069	popl	%eax
4070#if defined(__xpv)
4071	CURTHREAD(%ecx)
4072	KPREEMPT_DISABLE(%ecx)
4073	/*
4074	 * Synthesize the PS_IE bit from the event mask bit
4075	 */
4076	CURVCPU(%edx)
4077	andl    $_BITNOT(PS_IE), %eax
4078	XEN_TEST_UPCALL_MASK(%edx)
4079	jnz	1f
4080	orl	$PS_IE, %eax
40811:
4082	KPREEMPT_ENABLE_NOKP(%ecx)
4083#endif
4084	ret
4085	SET_SIZE(getflags)
4086
4087#endif	/* __i386 */
4088
4089#endif	/* __lint */
4090
4091#if defined(__lint)
4092
4093ftrace_icookie_t
4094ftrace_interrupt_disable(void)
4095{ return (0); }
4096
4097#else   /* __lint */
4098
4099#if defined(__amd64)
4100
4101	ENTRY(ftrace_interrupt_disable)
4102	pushfq
4103	popq	%rax
4104	CLI(%rdx)
4105	ret
4106	SET_SIZE(ftrace_interrupt_disable)
4107
4108#elif defined(__i386)
4109
4110	ENTRY(ftrace_interrupt_disable)
4111	pushfl
4112	popl	%eax
4113	CLI(%edx)
4114	ret
4115	SET_SIZE(ftrace_interrupt_disable)
4116
4117#endif	/* __i386 */
4118#endif	/* __lint */
4119
4120#if defined(__lint)
4121
4122/*ARGSUSED*/
4123void
4124ftrace_interrupt_enable(ftrace_icookie_t cookie)
4125{}
4126
4127#else	/* __lint */
4128
4129#if defined(__amd64)
4130
4131	ENTRY(ftrace_interrupt_enable)
4132	pushq	%rdi
4133	popfq
4134	ret
4135	SET_SIZE(ftrace_interrupt_enable)
4136
4137#elif defined(__i386)
4138
4139	ENTRY(ftrace_interrupt_enable)
4140	movl	4(%esp), %eax
4141	pushl	%eax
4142	popfl
4143	ret
4144	SET_SIZE(ftrace_interrupt_enable)
4145
4146#endif	/* __i386 */
4147#endif	/* __lint */
4148