xref: /titanic_52/usr/src/uts/intel/ia32/ml/i86_subr.s (revision 98157a7002f4f2cf7978f3084ca5577f0a1d72b2)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*
28 *  Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.
29 *  Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T
30 *    All Rights Reserved
31 */
32
33#pragma ident	"%Z%%M%	%I%	%E% SMI"
34
35/*
36 * General assembly language routines.
37 * It is the intent of this file to contain routines that are
38 * independent of the specific kernel architecture, and those that are
39 * common across kernel architectures.
40 * As architectures diverge, and implementations of specific
41 * architecture-dependent routines change, the routines should be moved
42 * from this file into the respective ../`arch -k`/subr.s file.
43 */
44
45#include <sys/asm_linkage.h>
46#include <sys/asm_misc.h>
47#include <sys/panic.h>
48#include <sys/ontrap.h>
49#include <sys/regset.h>
50#include <sys/privregs.h>
51#include <sys/reboot.h>
52#include <sys/psw.h>
53#include <sys/x86_archext.h>
54
55#if defined(__lint)
56#include <sys/types.h>
57#include <sys/systm.h>
58#include <sys/thread.h>
59#include <sys/archsystm.h>
60#include <sys/byteorder.h>
61#include <sys/dtrace.h>
62#include <sys/ftrace.h>
63#else	/* __lint */
64#include "assym.h"
65#endif	/* __lint */
66#include <sys/dditypes.h>
67
68/*
69 * on_fault()
70 * Catch lofault faults. Like setjmp except it returns one
71 * if code following causes uncorrectable fault. Turned off
72 * by calling no_fault().
73 */
74
75#if defined(__lint)
76
77/* ARGSUSED */
78int
79on_fault(label_t *ljb)
80{ return (0); }
81
82void
83no_fault(void)
84{}
85
86#else	/* __lint */
87
88#if defined(__amd64)
89
90	ENTRY(on_fault)
91	movq	%gs:CPU_THREAD, %rsi
92	leaq	catch_fault(%rip), %rdx
93	movq	%rdi, T_ONFAULT(%rsi)		/* jumpbuf in t_onfault */
94	movq	%rdx, T_LOFAULT(%rsi)		/* catch_fault in t_lofault */
95	jmp	setjmp				/* let setjmp do the rest */
96
97catch_fault:
98	movq	%gs:CPU_THREAD, %rsi
99	movq	T_ONFAULT(%rsi), %rdi		/* address of save area */
100	xorl	%eax, %eax
101	movq	%rax, T_ONFAULT(%rsi)		/* turn off onfault */
102	movq	%rax, T_LOFAULT(%rsi)		/* turn off lofault */
103	jmp	longjmp				/* let longjmp do the rest */
104	SET_SIZE(on_fault)
105
106	ENTRY(no_fault)
107	movq	%gs:CPU_THREAD, %rsi
108	xorl	%eax, %eax
109	movq	%rax, T_ONFAULT(%rsi)		/* turn off onfault */
110	movq	%rax, T_LOFAULT(%rsi)		/* turn off lofault */
111	ret
112	SET_SIZE(no_fault)
113
114#elif defined(__i386)
115
116	ENTRY(on_fault)
117	movl	%gs:CPU_THREAD, %edx
118	movl	4(%esp), %eax			/* jumpbuf address */
119	leal	catch_fault, %ecx
120	movl	%eax, T_ONFAULT(%edx)		/* jumpbuf in t_onfault */
121	movl	%ecx, T_LOFAULT(%edx)		/* catch_fault in t_lofault */
122	jmp	setjmp				/* let setjmp do the rest */
123
124catch_fault:
125	movl	%gs:CPU_THREAD, %edx
126	xorl	%eax, %eax
127	movl	T_ONFAULT(%edx), %ecx		/* address of save area */
128	movl	%eax, T_ONFAULT(%edx)		/* turn off onfault */
129	movl	%eax, T_LOFAULT(%edx)		/* turn off lofault */
130	pushl	%ecx
131	call	longjmp				/* let longjmp do the rest */
132	SET_SIZE(on_fault)
133
134	ENTRY(no_fault)
135	movl	%gs:CPU_THREAD, %edx
136	xorl	%eax, %eax
137	movl	%eax, T_ONFAULT(%edx)		/* turn off onfault */
138	movl	%eax, T_LOFAULT(%edx)		/* turn off lofault */
139	ret
140	SET_SIZE(no_fault)
141
142#endif	/* __i386 */
143#endif	/* __lint */
144
145/*
146 * Default trampoline code for on_trap() (see <sys/ontrap.h>).  We just
147 * do a longjmp(&curthread->t_ontrap->ot_jmpbuf) if this is ever called.
148 */
149
150#if defined(lint)
151
152void
153on_trap_trampoline(void)
154{}
155
156#else	/* __lint */
157
158#if defined(__amd64)
159
160	ENTRY(on_trap_trampoline)
161	movq	%gs:CPU_THREAD, %rsi
162	movq	T_ONTRAP(%rsi), %rdi
163	addq	$OT_JMPBUF, %rdi
164	jmp	longjmp
165	SET_SIZE(on_trap_trampoline)
166
167#elif defined(__i386)
168
169	ENTRY(on_trap_trampoline)
170	movl	%gs:CPU_THREAD, %eax
171	movl	T_ONTRAP(%eax), %eax
172	addl	$OT_JMPBUF, %eax
173	pushl	%eax
174	call	longjmp
175	SET_SIZE(on_trap_trampoline)
176
177#endif	/* __i386 */
178#endif	/* __lint */
179
180/*
181 * Push a new element on to the t_ontrap stack.  Refer to <sys/ontrap.h> for
182 * more information about the on_trap() mechanism.  If the on_trap_data is the
183 * same as the topmost stack element, we just modify that element.
184 */
185#if defined(lint)
186
187/*ARGSUSED*/
188int
189on_trap(on_trap_data_t *otp, uint_t prot)
190{ return (0); }
191
192#else	/* __lint */
193
194#if defined(__amd64)
195
196	ENTRY(on_trap)
197	movw	%si, OT_PROT(%rdi)		/* ot_prot = prot */
198	movw	$0, OT_TRAP(%rdi)		/* ot_trap = 0 */
199	leaq	on_trap_trampoline(%rip), %rdx	/* rdx = &on_trap_trampoline */
200	movq	%rdx, OT_TRAMPOLINE(%rdi)	/* ot_trampoline = rdx */
201	xorl	%ecx, %ecx
202	movq	%rcx, OT_HANDLE(%rdi)		/* ot_handle = NULL */
203	movq	%rcx, OT_PAD1(%rdi)		/* ot_pad1 = NULL */
204	movq	%gs:CPU_THREAD, %rdx		/* rdx = curthread */
205	movq	T_ONTRAP(%rdx), %rcx		/* rcx = curthread->t_ontrap */
206	cmpq	%rdi, %rcx			/* if (otp == %rcx)	*/
207	je	0f				/*	don't modify t_ontrap */
208
209	movq	%rcx, OT_PREV(%rdi)		/* ot_prev = t_ontrap */
210	movq	%rdi, T_ONTRAP(%rdx)		/* curthread->t_ontrap = otp */
211
2120:	addq	$OT_JMPBUF, %rdi		/* &ot_jmpbuf */
213	jmp	setjmp
214	SET_SIZE(on_trap)
215
216#elif defined(__i386)
217
218	ENTRY(on_trap)
219	movl	4(%esp), %eax			/* %eax = otp */
220	movl	8(%esp), %edx			/* %edx = prot */
221
222	movw	%dx, OT_PROT(%eax)		/* ot_prot = prot */
223	movw	$0, OT_TRAP(%eax)		/* ot_trap = 0 */
224	leal	on_trap_trampoline, %edx	/* %edx = &on_trap_trampoline */
225	movl	%edx, OT_TRAMPOLINE(%eax)	/* ot_trampoline = %edx */
226	movl	$0, OT_HANDLE(%eax)		/* ot_handle = NULL */
227	movl	$0, OT_PAD1(%eax)		/* ot_pad1 = NULL */
228	movl	%gs:CPU_THREAD, %edx		/* %edx = curthread */
229	movl	T_ONTRAP(%edx), %ecx		/* %ecx = curthread->t_ontrap */
230	cmpl	%eax, %ecx			/* if (otp == %ecx) */
231	je	0f				/*    don't modify t_ontrap */
232
233	movl	%ecx, OT_PREV(%eax)		/* ot_prev = t_ontrap */
234	movl	%eax, T_ONTRAP(%edx)		/* curthread->t_ontrap = otp */
235
2360:	addl	$OT_JMPBUF, %eax		/* %eax = &ot_jmpbuf */
237	movl	%eax, 4(%esp)			/* put %eax back on the stack */
238	jmp	setjmp				/* let setjmp do the rest */
239	SET_SIZE(on_trap)
240
241#endif	/* __i386 */
242#endif	/* __lint */
243
244/*
245 * Setjmp and longjmp implement non-local gotos using state vectors
246 * type label_t.
247 */
248
249#if defined(__lint)
250
251/* ARGSUSED */
252int
253setjmp(label_t *lp)
254{ return (0); }
255
256/* ARGSUSED */
257void
258longjmp(label_t *lp)
259{}
260
261#else	/* __lint */
262
263#if LABEL_PC != 0
264#error LABEL_PC MUST be defined as 0 for setjmp/longjmp to work as coded
265#endif	/* LABEL_PC != 0 */
266
267#if defined(__amd64)
268
269	ENTRY(setjmp)
270	movq	%rsp, LABEL_SP(%rdi)
271	movq	%rbp, LABEL_RBP(%rdi)
272	movq	%rbx, LABEL_RBX(%rdi)
273	movq	%r12, LABEL_R12(%rdi)
274	movq	%r13, LABEL_R13(%rdi)
275	movq	%r14, LABEL_R14(%rdi)
276	movq	%r15, LABEL_R15(%rdi)
277	movq	(%rsp), %rdx		/* return address */
278	movq	%rdx, (%rdi)		/* LABEL_PC is 0 */
279	xorl	%eax, %eax		/* return 0 */
280	ret
281	SET_SIZE(setjmp)
282
283	ENTRY(longjmp)
284	movq	LABEL_SP(%rdi), %rsp
285	movq	LABEL_RBP(%rdi), %rbp
286	movq	LABEL_RBX(%rdi), %rbx
287	movq	LABEL_R12(%rdi), %r12
288	movq	LABEL_R13(%rdi), %r13
289	movq	LABEL_R14(%rdi), %r14
290	movq	LABEL_R15(%rdi), %r15
291	movq	(%rdi), %rdx		/* return address; LABEL_PC is 0 */
292	movq	%rdx, (%rsp)
293	xorl	%eax, %eax
294	incl	%eax			/* return 1 */
295	ret
296	SET_SIZE(longjmp)
297
298#elif defined(__i386)
299
300	ENTRY(setjmp)
301	movl	4(%esp), %edx		/* address of save area */
302	movl	%ebp, LABEL_EBP(%edx)
303	movl	%ebx, LABEL_EBX(%edx)
304	movl	%esi, LABEL_ESI(%edx)
305	movl	%edi, LABEL_EDI(%edx)
306	movl	%esp, 4(%edx)
307	movl	(%esp), %ecx		/* %eip (return address) */
308	movl	%ecx, (%edx)		/* LABEL_PC is 0 */
309	subl	%eax, %eax		/* return 0 */
310	ret
311	SET_SIZE(setjmp)
312
313	ENTRY(longjmp)
314	movl	4(%esp), %edx		/* address of save area */
315	movl	LABEL_EBP(%edx), %ebp
316	movl	LABEL_EBX(%edx), %ebx
317	movl	LABEL_ESI(%edx), %esi
318	movl	LABEL_EDI(%edx), %edi
319	movl	4(%edx), %esp
320	movl	(%edx), %ecx		/* %eip (return addr); LABEL_PC is 0 */
321	movl	$1, %eax
322	addl	$4, %esp		/* pop ret adr */
323	jmp	*%ecx			/* indirect */
324	SET_SIZE(longjmp)
325
326#endif	/* __i386 */
327#endif	/* __lint */
328
329/*
330 * if a() calls b() calls caller(),
331 * caller() returns return address in a().
332 * (Note: We assume a() and b() are C routines which do the normal entry/exit
333 *  sequence.)
334 */
335
336#if defined(__lint)
337
338caddr_t
339caller(void)
340{ return (0); }
341
342#else	/* __lint */
343
344#if defined(__amd64)
345
346	ENTRY(caller)
347	movq	8(%rbp), %rax		/* b()'s return pc, in a() */
348	ret
349	SET_SIZE(caller)
350
351#elif defined(__i386)
352
353	ENTRY(caller)
354	movl	4(%ebp), %eax		/* b()'s return pc, in a() */
355	ret
356	SET_SIZE(caller)
357
358#endif	/* __i386 */
359#endif	/* __lint */
360
361/*
362 * if a() calls callee(), callee() returns the
363 * return address in a();
364 */
365
366#if defined(__lint)
367
368caddr_t
369callee(void)
370{ return (0); }
371
372#else	/* __lint */
373
374#if defined(__amd64)
375
376	ENTRY(callee)
377	movq	(%rsp), %rax		/* callee()'s return pc, in a() */
378	ret
379	SET_SIZE(callee)
380
381#elif defined(__i386)
382
383	ENTRY(callee)
384	movl	(%esp), %eax		/* callee()'s return pc, in a() */
385	ret
386	SET_SIZE(callee)
387
388#endif	/* __i386 */
389#endif	/* __lint */
390
391/*
392 * return the current frame pointer
393 */
394
395#if defined(__lint)
396
397greg_t
398getfp(void)
399{ return (0); }
400
401#else	/* __lint */
402
403#if defined(__amd64)
404
405	ENTRY(getfp)
406	movq	%rbp, %rax
407	ret
408	SET_SIZE(getfp)
409
410#elif defined(__i386)
411
412	ENTRY(getfp)
413	movl	%ebp, %eax
414	ret
415	SET_SIZE(getfp)
416
417#endif	/* __i386 */
418#endif	/* __lint */
419
420/*
421 * Invalidate a single page table entry in the TLB
422 */
423
424#if defined(__lint)
425
426/* ARGSUSED */
427void
428mmu_tlbflush_entry(caddr_t m)
429{}
430
431#else	/* __lint */
432
433#if defined(__amd64)
434
435	ENTRY(mmu_tlbflush_entry)
436	invlpg	(%rdi)
437	ret
438	SET_SIZE(mmu_tlbflush_entry)
439
440#elif defined(__i386)
441
442	ENTRY(mmu_tlbflush_entry)
443	movl	4(%esp), %eax
444	invlpg	(%eax)
445	ret
446	SET_SIZE(mmu_tlbflush_entry)
447
448#endif	/* __i386 */
449#endif	/* __lint */
450
451
452/*
453 * Get/Set the value of various control registers
454 */
455
456#if defined(__lint)
457
458ulong_t
459getcr0(void)
460{ return (0); }
461
462/* ARGSUSED */
463void
464setcr0(ulong_t value)
465{}
466
467ulong_t
468getcr2(void)
469{ return (0); }
470
471ulong_t
472getcr3(void)
473{ return (0); }
474
475#if !defined(__xpv)
476/* ARGSUSED */
477void
478setcr3(ulong_t val)
479{}
480
481void
482reload_cr3(void)
483{}
484#endif
485
486ulong_t
487getcr4(void)
488{ return (0); }
489
490/* ARGSUSED */
491void
492setcr4(ulong_t val)
493{}
494
495#if defined(__amd64)
496
497ulong_t
498getcr8(void)
499{ return (0); }
500
501/* ARGSUSED */
502void
503setcr8(ulong_t val)
504{}
505
506#endif	/* __amd64 */
507
508#else	/* __lint */
509
510#if defined(__amd64)
511
512	ENTRY(getcr0)
513	movq	%cr0, %rax
514	ret
515	SET_SIZE(getcr0)
516
517	ENTRY(setcr0)
518	movq	%rdi, %cr0
519	ret
520	SET_SIZE(setcr0)
521
522        ENTRY(getcr2)
523#if defined(__xpv)
524	movq	%gs:CPU_VCPU_INFO, %rax
525	movq	VCPU_INFO_ARCH_CR2(%rax), %rax
526#else
527        movq    %cr2, %rax
528#endif
529        ret
530	SET_SIZE(getcr2)
531
532	ENTRY(getcr3)
533	movq    %cr3, %rax
534	ret
535	SET_SIZE(getcr3)
536
537#if !defined(__xpv)
538
539        ENTRY(setcr3)
540        movq    %rdi, %cr3
541        ret
542	SET_SIZE(setcr3)
543
544	ENTRY(reload_cr3)
545	movq	%cr3, %rdi
546	movq	%rdi, %cr3
547	ret
548	SET_SIZE(reload_cr3)
549
550#endif	/* __xpv */
551
552	ENTRY(getcr4)
553	movq	%cr4, %rax
554	ret
555	SET_SIZE(getcr4)
556
557	ENTRY(setcr4)
558	movq	%rdi, %cr4
559	ret
560	SET_SIZE(setcr4)
561
562	ENTRY(getcr8)
563	movq	%cr8, %rax
564	ret
565	SET_SIZE(getcr8)
566
567	ENTRY(setcr8)
568	movq	%rdi, %cr8
569	ret
570	SET_SIZE(setcr8)
571
572#elif defined(__i386)
573
574        ENTRY(getcr0)
575        movl    %cr0, %eax
576        ret
577	SET_SIZE(getcr0)
578
579        ENTRY(setcr0)
580        movl    4(%esp), %eax
581        movl    %eax, %cr0
582        ret
583	SET_SIZE(setcr0)
584
585        ENTRY(getcr2)
586#if defined(__xpv)
587	movl	%gs:CPU_VCPU_INFO, %eax
588	movl	VCPU_INFO_ARCH_CR2(%eax), %eax
589#else
590        movl    %cr2, %eax
591#endif
592        ret
593	SET_SIZE(getcr2)
594
595	ENTRY(getcr3)
596	movl    %cr3, %eax
597	ret
598	SET_SIZE(getcr3)
599
600#if !defined(__xpv)
601
602        ENTRY(setcr3)
603        movl    4(%esp), %eax
604        movl    %eax, %cr3
605        ret
606	SET_SIZE(setcr3)
607
608	ENTRY(reload_cr3)
609	movl    %cr3, %eax
610	movl    %eax, %cr3
611	ret
612	SET_SIZE(reload_cr3)
613
614#endif	/* __xpv */
615
616	ENTRY(getcr4)
617	movl    %cr4, %eax
618	ret
619	SET_SIZE(getcr4)
620
621        ENTRY(setcr4)
622        movl    4(%esp), %eax
623        movl    %eax, %cr4
624        ret
625	SET_SIZE(setcr4)
626
627#endif	/* __i386 */
628#endif	/* __lint */
629
630#if defined(__lint)
631
632/*ARGSUSED*/
633uint32_t
634__cpuid_insn(struct cpuid_regs *regs)
635{ return (0); }
636
637#else	/* __lint */
638
639#if defined(__amd64)
640
641	ENTRY(__cpuid_insn)
642	movq	%rbx, %r8
643	movq	%rcx, %r9
644	movq	%rdx, %r11
645	movl	(%rdi), %eax		/* %eax = regs->cp_eax */
646	movl	0x4(%rdi), %ebx		/* %ebx = regs->cp_ebx */
647	movl	0x8(%rdi), %ecx		/* %ecx = regs->cp_ecx */
648	movl	0xc(%rdi), %edx		/* %edx = regs->cp_edx */
649	cpuid
650	movl	%eax, (%rdi)		/* regs->cp_eax = %eax */
651	movl	%ebx, 0x4(%rdi)		/* regs->cp_ebx = %ebx */
652	movl	%ecx, 0x8(%rdi)		/* regs->cp_ecx = %ecx */
653	movl	%edx, 0xc(%rdi)		/* regs->cp_edx = %edx */
654	movq	%r8, %rbx
655	movq	%r9, %rcx
656	movq	%r11, %rdx
657	ret
658	SET_SIZE(__cpuid_insn)
659
660#elif defined(__i386)
661
662        ENTRY(__cpuid_insn)
663	pushl	%ebp
664	movl	0x8(%esp), %ebp		/* %ebp = regs */
665	pushl	%ebx
666	pushl	%ecx
667	pushl	%edx
668	movl	(%ebp), %eax		/* %eax = regs->cp_eax */
669	movl	0x4(%ebp), %ebx		/* %ebx = regs->cp_ebx */
670	movl	0x8(%ebp), %ecx		/* %ecx = regs->cp_ecx */
671	movl	0xc(%ebp), %edx		/* %edx = regs->cp_edx */
672	cpuid
673	movl	%eax, (%ebp)		/* regs->cp_eax = %eax */
674	movl	%ebx, 0x4(%ebp)		/* regs->cp_ebx = %ebx */
675	movl	%ecx, 0x8(%ebp)		/* regs->cp_ecx = %ecx */
676	movl	%edx, 0xc(%ebp)		/* regs->cp_edx = %edx */
677	popl	%edx
678	popl	%ecx
679	popl	%ebx
680	popl	%ebp
681	ret
682	SET_SIZE(__cpuid_insn)
683
684#endif	/* __i386 */
685#endif	/* __lint */
686
687#if defined(__xpv)
688	/*
689	 * Defined in C
690	 */
691#else
692
693#if defined(__lint)
694
695/*ARGSUSED*/
696void
697i86_monitor(volatile uint32_t *addr, uint32_t extensions, uint32_t hints)
698{ return; }
699
700#else   /* __lint */
701
702#if defined(__amd64)
703
704	ENTRY_NP(i86_monitor)
705	pushq	%rbp
706	movq	%rsp, %rbp
707	movq	%rdi, %rax		/* addr */
708	movq	%rsi, %rcx		/* extensions */
709	/* rdx contains input arg3: hints */
710	.byte	0x0f, 0x01, 0xc8	/* monitor */
711	leave
712	ret
713	SET_SIZE(i86_monitor)
714
715#elif defined(__i386)
716
717ENTRY_NP(i86_monitor)
718	pushl	%ebp
719	movl	%esp, %ebp
720	movl	0x8(%ebp),%eax		/* addr */
721	movl	0xc(%ebp),%ecx		/* extensions */
722	movl	0x10(%ebp),%edx		/* hints */
723	.byte	0x0f, 0x01, 0xc8	/* monitor */
724	leave
725	ret
726	SET_SIZE(i86_monitor)
727
728#endif	/* __i386 */
729#endif	/* __lint */
730
731#if defined(__lint)
732
733/*ARGSUSED*/
734void
735i86_mwait(uint32_t data, uint32_t extensions)
736{ return; }
737
738#else	/* __lint */
739
740#if defined(__amd64)
741
742	ENTRY_NP(i86_mwait)
743	pushq	%rbp
744	movq	%rsp, %rbp
745	movq	%rdi, %rax		/* data */
746	movq	%rsi, %rcx		/* extensions */
747	.byte	0x0f, 0x01, 0xc9	/* mwait */
748	leave
749	ret
750	SET_SIZE(i86_mwait)
751
752#elif defined(__i386)
753
754	ENTRY_NP(i86_mwait)
755	pushl	%ebp
756	movl	%esp, %ebp
757	movl	0x8(%ebp),%eax		/* data */
758	movl	0xc(%ebp),%ecx		/* extensions */
759	.byte	0x0f, 0x01, 0xc9	/* mwait */
760	leave
761	ret
762	SET_SIZE(i86_mwait)
763
764#endif	/* __i386 */
765#endif	/* __lint */
766
767#if defined(__lint)
768
769hrtime_t
770tsc_read(void)
771{
772	return (0);
773}
774
775#else	/* __lint */
776
777#if defined(__amd64)
778
779	ENTRY_NP(tsc_read)
780	movq	%rbx, %r11
781	movl	$0, %eax
782	cpuid
783	rdtsc
784	movq	%r11, %rbx
785	shlq	$32, %rdx
786	orq	%rdx, %rax
787	ret
788	.globl _tsc_mfence_start
789_tsc_mfence_start:
790	mfence
791	rdtsc
792	shlq	$32, %rdx
793	orq	%rdx, %rax
794	ret
795	.globl _tsc_mfence_end
796_tsc_mfence_end:
797	.globl _tscp_start
798_tscp_start:
799	.byte	0x0f, 0x01, 0xf9	/* rdtscp instruction */
800	shlq	$32, %rdx
801	orq	%rdx, %rax
802	ret
803	.globl _tscp_end
804_tscp_end:
805	.globl _no_rdtsc_start
806_no_rdtsc_start:
807	xorl	%edx, %edx
808	xorl	%eax, %eax
809	ret
810	.globl _no_rdtsc_end
811_no_rdtsc_end:
812	SET_SIZE(tsc_read)
813
814#else /* __i386 */
815
816	ENTRY_NP(tsc_read)
817	pushl	%ebx
818	movl	$0, %eax
819	cpuid
820	rdtsc
821	popl	%ebx
822	ret
823	.globl _tsc_mfence_start
824_tsc_mfence_start:
825	mfence
826	rdtsc
827	ret
828	.globl _tsc_mfence_end
829_tsc_mfence_end:
830	.globl	_tscp_start
831_tscp_start:
832	.byte	0x0f, 0x01, 0xf9	/* rdtscp instruction */
833	ret
834	.globl _tscp_end
835_tscp_end:
836	.globl _no_rdtsc_start
837_no_rdtsc_start:
838	xorl	%edx, %edx
839	xorl	%eax, %eax
840	ret
841	.globl _no_rdtsc_end
842_no_rdtsc_end:
843	SET_SIZE(tsc_read)
844
845#endif	/* __i386 */
846
847#endif	/* __lint */
848
849#endif	/* __xpv */
850
851/*
852 * Insert entryp after predp in a doubly linked list.
853 */
854
855#if defined(__lint)
856
857/*ARGSUSED*/
858void
859_insque(caddr_t entryp, caddr_t predp)
860{}
861
862#else	/* __lint */
863
864#if defined(__amd64)
865
866	ENTRY(_insque)
867	movq	(%rsi), %rax		/* predp->forw 			*/
868	movq	%rsi, CPTRSIZE(%rdi)	/* entryp->back = predp		*/
869	movq	%rax, (%rdi)		/* entryp->forw = predp->forw	*/
870	movq	%rdi, (%rsi)		/* predp->forw = entryp		*/
871	movq	%rdi, CPTRSIZE(%rax)	/* predp->forw->back = entryp	*/
872	ret
873	SET_SIZE(_insque)
874
875#elif defined(__i386)
876
877	ENTRY(_insque)
878	movl	8(%esp), %edx
879	movl	4(%esp), %ecx
880	movl	(%edx), %eax		/* predp->forw			*/
881	movl	%edx, CPTRSIZE(%ecx)	/* entryp->back = predp		*/
882	movl	%eax, (%ecx)		/* entryp->forw = predp->forw	*/
883	movl	%ecx, (%edx)		/* predp->forw = entryp		*/
884	movl	%ecx, CPTRSIZE(%eax)	/* predp->forw->back = entryp	*/
885	ret
886	SET_SIZE(_insque)
887
888#endif	/* __i386 */
889#endif	/* __lint */
890
891/*
892 * Remove entryp from a doubly linked list
893 */
894
895#if defined(__lint)
896
897/*ARGSUSED*/
898void
899_remque(caddr_t entryp)
900{}
901
902#else	/* __lint */
903
904#if defined(__amd64)
905
906	ENTRY(_remque)
907	movq	(%rdi), %rax		/* entry->forw */
908	movq	CPTRSIZE(%rdi), %rdx	/* entry->back */
909	movq	%rax, (%rdx)		/* entry->back->forw = entry->forw */
910	movq	%rdx, CPTRSIZE(%rax)	/* entry->forw->back = entry->back */
911	ret
912	SET_SIZE(_remque)
913
914#elif defined(__i386)
915
916	ENTRY(_remque)
917	movl	4(%esp), %ecx
918	movl	(%ecx), %eax		/* entry->forw */
919	movl	CPTRSIZE(%ecx), %edx	/* entry->back */
920	movl	%eax, (%edx)		/* entry->back->forw = entry->forw */
921	movl	%edx, CPTRSIZE(%eax)	/* entry->forw->back = entry->back */
922	ret
923	SET_SIZE(_remque)
924
925#endif	/* __i386 */
926#endif	/* __lint */
927
928/*
929 * Returns the number of
930 * non-NULL bytes in string argument.
931 */
932
933#if defined(__lint)
934
935/* ARGSUSED */
936size_t
937strlen(const char *str)
938{ return (0); }
939
940#else	/* __lint */
941
942#if defined(__amd64)
943
944/*
945 * This is close to a simple transliteration of a C version of this
946 * routine.  We should either just -make- this be a C version, or
947 * justify having it in assembler by making it significantly faster.
948 *
949 * size_t
950 * strlen(const char *s)
951 * {
952 *	const char *s0;
953 * #if defined(DEBUG)
954 *	if ((uintptr_t)s < KERNELBASE)
955 *		panic(.str_panic_msg);
956 * #endif
957 *	for (s0 = s; *s; s++)
958 *		;
959 *	return (s - s0);
960 * }
961 */
962
963	ENTRY(strlen)
964#ifdef DEBUG
965	movq	postbootkernelbase(%rip), %rax
966	cmpq	%rax, %rdi
967	jae	str_valid
968	pushq	%rbp
969	movq	%rsp, %rbp
970	leaq	.str_panic_msg(%rip), %rdi
971	xorl	%eax, %eax
972	call	panic
973#endif	/* DEBUG */
974str_valid:
975	cmpb	$0, (%rdi)
976	movq	%rdi, %rax
977	je	.null_found
978	.align	4
979.strlen_loop:
980	incq	%rdi
981	cmpb	$0, (%rdi)
982	jne	.strlen_loop
983.null_found:
984	subq	%rax, %rdi
985	movq	%rdi, %rax
986	ret
987	SET_SIZE(strlen)
988
989#elif defined(__i386)
990
991	ENTRY(strlen)
992#ifdef DEBUG
993	movl	postbootkernelbase, %eax
994	cmpl	%eax, 4(%esp)
995	jae	str_valid
996	pushl	%ebp
997	movl	%esp, %ebp
998	pushl	$.str_panic_msg
999	call	panic
1000#endif /* DEBUG */
1001
1002str_valid:
1003	movl	4(%esp), %eax		/* %eax = string address */
1004	testl	$3, %eax		/* if %eax not word aligned */
1005	jnz	.not_word_aligned	/* goto .not_word_aligned */
1006	.align	4
1007.word_aligned:
1008	movl	(%eax), %edx		/* move 1 word from (%eax) to %edx */
1009	movl	$0x7f7f7f7f, %ecx
1010	andl	%edx, %ecx		/* %ecx = %edx & 0x7f7f7f7f */
1011	addl	$4, %eax		/* next word */
1012	addl	$0x7f7f7f7f, %ecx	/* %ecx += 0x7f7f7f7f */
1013	orl	%edx, %ecx		/* %ecx |= %edx */
1014	andl	$0x80808080, %ecx	/* %ecx &= 0x80808080 */
1015	cmpl	$0x80808080, %ecx	/* if no null byte in this word */
1016	je	.word_aligned		/* goto .word_aligned */
1017	subl	$4, %eax		/* post-incremented */
1018.not_word_aligned:
1019	cmpb	$0, (%eax)		/* if a byte in (%eax) is null */
1020	je	.null_found		/* goto .null_found */
1021	incl	%eax			/* next byte */
1022	testl	$3, %eax		/* if %eax not word aligned */
1023	jnz	.not_word_aligned	/* goto .not_word_aligned */
1024	jmp	.word_aligned		/* goto .word_aligned */
1025	.align	4
1026.null_found:
1027	subl	4(%esp), %eax		/* %eax -= string address */
1028	ret
1029	SET_SIZE(strlen)
1030
1031#endif	/* __i386 */
1032
1033#ifdef DEBUG
1034	.text
1035.str_panic_msg:
1036	.string "strlen: argument below kernelbase"
1037#endif /* DEBUG */
1038
1039#endif	/* __lint */
1040
1041	/*
1042	 * Berkley 4.3 introduced symbolically named interrupt levels
1043	 * as a way deal with priority in a machine independent fashion.
1044	 * Numbered priorities are machine specific, and should be
1045	 * discouraged where possible.
1046	 *
1047	 * Note, for the machine specific priorities there are
1048	 * examples listed for devices that use a particular priority.
1049	 * It should not be construed that all devices of that
1050	 * type should be at that priority.  It is currently were
1051	 * the current devices fit into the priority scheme based
1052	 * upon time criticalness.
1053	 *
1054	 * The underlying assumption of these assignments is that
1055	 * IPL 10 is the highest level from which a device
1056	 * routine can call wakeup.  Devices that interrupt from higher
1057	 * levels are restricted in what they can do.  If they need
1058	 * kernels services they should schedule a routine at a lower
1059	 * level (via software interrupt) to do the required
1060	 * processing.
1061	 *
1062	 * Examples of this higher usage:
1063	 *	Level	Usage
1064	 *	14	Profiling clock (and PROM uart polling clock)
1065	 *	12	Serial ports
1066	 *
1067	 * The serial ports request lower level processing on level 6.
1068	 *
1069	 * Also, almost all splN routines (where N is a number or a
1070	 * mnemonic) will do a RAISE(), on the assumption that they are
1071	 * never used to lower our priority.
1072	 * The exceptions are:
1073	 *	spl8()		Because you can't be above 15 to begin with!
1074	 *	splzs()		Because this is used at boot time to lower our
1075	 *			priority, to allow the PROM to poll the uart.
1076	 *	spl0()		Used to lower priority to 0.
1077	 */
1078
1079#if defined(__lint)
1080
1081int spl0(void)		{ return (0); }
1082int spl6(void)		{ return (0); }
1083int spl7(void)		{ return (0); }
1084int spl8(void)		{ return (0); }
1085int splhigh(void)	{ return (0); }
1086int splhi(void)		{ return (0); }
1087int splzs(void)		{ return (0); }
1088
1089/* ARGSUSED */
1090void
1091splx(int level)
1092{}
1093
1094#else	/* __lint */
1095
1096#if defined(__amd64)
1097
1098#define	SETPRI(level) \
1099	movl	$/**/level, %edi;	/* new priority */		\
1100	jmp	do_splx			/* redirect to do_splx */
1101
1102#define	RAISE(level) \
1103	movl	$/**/level, %edi;	/* new priority */		\
1104	jmp	splr			/* redirect to splr */
1105
1106#elif defined(__i386)
1107
1108#define	SETPRI(level) \
1109	pushl	$/**/level;	/* new priority */			\
1110	call	do_splx;	/* invoke common splx code */		\
1111	addl	$4, %esp;	/* unstack arg */			\
1112	ret
1113
1114#define	RAISE(level) \
1115	pushl	$/**/level;	/* new priority */			\
1116	call	splr;		/* invoke common splr code */		\
1117	addl	$4, %esp;	/* unstack args */			\
1118	ret
1119
1120#endif	/* __i386 */
1121
1122	/* locks out all interrupts, including memory errors */
1123	ENTRY(spl8)
1124	SETPRI(15)
1125	SET_SIZE(spl8)
1126
1127	/* just below the level that profiling runs */
1128	ENTRY(spl7)
1129	RAISE(13)
1130	SET_SIZE(spl7)
1131
1132	/* sun specific - highest priority onboard serial i/o asy ports */
1133	ENTRY(splzs)
1134	SETPRI(12)	/* Can't be a RAISE, as it's used to lower us */
1135	SET_SIZE(splzs)
1136
1137	ENTRY(splhi)
1138	ALTENTRY(splhigh)
1139	ALTENTRY(spl6)
1140	ALTENTRY(i_ddi_splhigh)
1141
1142	RAISE(DISP_LEVEL)
1143
1144	SET_SIZE(i_ddi_splhigh)
1145	SET_SIZE(spl6)
1146	SET_SIZE(splhigh)
1147	SET_SIZE(splhi)
1148
1149	/* allow all interrupts */
1150	ENTRY(spl0)
1151	SETPRI(0)
1152	SET_SIZE(spl0)
1153
1154
1155	/* splx implentation */
1156	ENTRY(splx)
1157	jmp	do_splx		/* redirect to common splx code */
1158	SET_SIZE(splx)
1159
1160#endif	/* __lint */
1161
1162#if defined(__i386)
1163
1164/*
1165 * Read and write the %gs register
1166 */
1167
1168#if defined(__lint)
1169
1170/*ARGSUSED*/
1171uint16_t
1172getgs(void)
1173{ return (0); }
1174
1175/*ARGSUSED*/
1176void
1177setgs(uint16_t sel)
1178{}
1179
1180#else	/* __lint */
1181
1182	ENTRY(getgs)
1183	clr	%eax
1184	movw	%gs, %ax
1185	ret
1186	SET_SIZE(getgs)
1187
1188	ENTRY(setgs)
1189	movw	4(%esp), %gs
1190	ret
1191	SET_SIZE(setgs)
1192
1193#endif	/* __lint */
1194#endif	/* __i386 */
1195
1196#if defined(__lint)
1197
1198void
1199pc_reset(void)
1200{}
1201
1202void
1203efi_reset(void)
1204{}
1205
1206#else	/* __lint */
1207
1208	ENTRY(wait_500ms)
1209	push	%ebx
1210	movl	$50000, %ebx
12111:
1212	call	tenmicrosec
1213	decl	%ebx
1214	jnz	1b
1215	pop	%ebx
1216	ret
1217	SET_SIZE(wait_500ms)
1218
1219#define	RESET_METHOD_KBC	1
1220#define	RESET_METHOD_PORT92	2
1221#define RESET_METHOD_PCI	4
1222
1223	DGDEF3(pc_reset_methods, 4, 8)
1224	.long RESET_METHOD_KBC|RESET_METHOD_PORT92|RESET_METHOD_PCI;
1225
1226	ENTRY(pc_reset)
1227
1228#if defined(__i386)
1229	testl	$RESET_METHOD_KBC, pc_reset_methods
1230#elif defined(__amd64)
1231	testl	$RESET_METHOD_KBC, pc_reset_methods(%rip)
1232#endif
1233	jz	1f
1234
1235	/
1236	/ Try the classic keyboard controller-triggered reset.
1237	/
1238	movw	$0x64, %dx
1239	movb	$0xfe, %al
1240	outb	(%dx)
1241
1242	/ Wait up to 500 milliseconds here for the keyboard controller
1243	/ to pull the reset line.  On some systems where the keyboard
1244	/ controller is slow to pull the reset line, the next reset method
1245	/ may be executed (which may be bad if those systems hang when the
1246	/ next reset method is used, e.g. Ferrari 3400 (doesn't like port 92),
1247	/ and Ferrari 4000 (doesn't like the cf9 reset method))
1248
1249	call	wait_500ms
1250
12511:
1252#if defined(__i386)
1253	testl	$RESET_METHOD_PORT92, pc_reset_methods
1254#elif defined(__amd64)
1255	testl	$RESET_METHOD_PORT92, pc_reset_methods(%rip)
1256#endif
1257	jz	3f
1258
1259	/
1260	/ Try port 0x92 fast reset
1261	/
1262	movw	$0x92, %dx
1263	inb	(%dx)
1264	cmpb	$0xff, %al	/ If port's not there, we should get back 0xFF
1265	je	1f
1266	testb	$1, %al		/ If bit 0
1267	jz	2f		/ is clear, jump to perform the reset
1268	andb	$0xfe, %al	/ otherwise,
1269	outb	(%dx)		/ clear bit 0 first, then
12702:
1271	orb	$1, %al		/ Set bit 0
1272	outb	(%dx)		/ and reset the system
12731:
1274
1275	call	wait_500ms
1276
12773:
1278#if defined(__i386)
1279	testl	$RESET_METHOD_PCI, pc_reset_methods
1280#elif defined(__amd64)
1281	testl	$RESET_METHOD_PCI, pc_reset_methods(%rip)
1282#endif
1283	jz	4f
1284
1285	/ Try the PCI (soft) reset vector (should work on all modern systems,
1286	/ but has been shown to cause problems on 450NX systems, and some newer
1287	/ systems (e.g. ATI IXP400-equipped systems))
1288	/ When resetting via this method, 2 writes are required.  The first
1289	/ targets bit 1 (0=hard reset without power cycle, 1=hard reset with
1290	/ power cycle).
1291	/ The reset occurs on the second write, during bit 2's transition from
1292	/ 0->1.
1293	movw	$0xcf9, %dx
1294	movb	$0x2, %al	/ Reset mode = hard, no power cycle
1295	outb	(%dx)
1296	movb	$0x6, %al
1297	outb	(%dx)
1298
1299	call	wait_500ms
1300
13014:
1302	/
1303	/ port 0xcf9 failed also.  Last-ditch effort is to
1304	/ triple-fault the CPU.
1305	/ Also, use triple fault for EFI firmware
1306	/
1307	ENTRY(efi_reset)
1308#if defined(__amd64)
1309	pushq	$0x0
1310	pushq	$0x0		/ IDT base of 0, limit of 0 + 2 unused bytes
1311	lidt	(%rsp)
1312#elif defined(__i386)
1313	pushl	$0x0
1314	pushl	$0x0		/ IDT base of 0, limit of 0 + 2 unused bytes
1315	lidt	(%esp)
1316#endif
1317	int	$0x0		/ Trigger interrupt, generate triple-fault
1318
1319	cli
1320	hlt			/ Wait forever
1321	/*NOTREACHED*/
1322	SET_SIZE(efi_reset)
1323	SET_SIZE(pc_reset)
1324
1325#endif	/* __lint */
1326
1327/*
1328 * C callable in and out routines
1329 */
1330
1331#if defined(__lint)
1332
1333/* ARGSUSED */
1334void
1335outl(int port_address, uint32_t val)
1336{}
1337
1338#else	/* __lint */
1339
1340#if defined(__amd64)
1341
1342	ENTRY(outl)
1343	movw	%di, %dx
1344	movl	%esi, %eax
1345	outl	(%dx)
1346	ret
1347	SET_SIZE(outl)
1348
1349#elif defined(__i386)
1350
1351	.set	PORT, 4
1352	.set	VAL, 8
1353
1354	ENTRY(outl)
1355	movw	PORT(%esp), %dx
1356	movl	VAL(%esp), %eax
1357	outl	(%dx)
1358	ret
1359	SET_SIZE(outl)
1360
1361#endif	/* __i386 */
1362#endif	/* __lint */
1363
1364#if defined(__lint)
1365
1366/* ARGSUSED */
1367void
1368outw(int port_address, uint16_t val)
1369{}
1370
1371#else	/* __lint */
1372
1373#if defined(__amd64)
1374
1375	ENTRY(outw)
1376	movw	%di, %dx
1377	movw	%si, %ax
1378	D16 outl (%dx)		/* XX64 why not outw? */
1379	ret
1380	SET_SIZE(outw)
1381
1382#elif defined(__i386)
1383
1384	ENTRY(outw)
1385	movw	PORT(%esp), %dx
1386	movw	VAL(%esp), %ax
1387	D16 outl (%dx)
1388	ret
1389	SET_SIZE(outw)
1390
1391#endif	/* __i386 */
1392#endif	/* __lint */
1393
1394#if defined(__lint)
1395
1396/* ARGSUSED */
1397void
1398outb(int port_address, uint8_t val)
1399{}
1400
1401#else	/* __lint */
1402
1403#if defined(__amd64)
1404
1405	ENTRY(outb)
1406	movw	%di, %dx
1407	movb	%sil, %al
1408	outb	(%dx)
1409	ret
1410	SET_SIZE(outb)
1411
1412#elif defined(__i386)
1413
1414	ENTRY(outb)
1415	movw	PORT(%esp), %dx
1416	movb	VAL(%esp), %al
1417	outb	(%dx)
1418	ret
1419	SET_SIZE(outb)
1420
1421#endif	/* __i386 */
1422#endif	/* __lint */
1423
1424#if defined(__lint)
1425
1426/* ARGSUSED */
1427uint32_t
1428inl(int port_address)
1429{ return (0); }
1430
1431#else	/* __lint */
1432
1433#if defined(__amd64)
1434
1435	ENTRY(inl)
1436	xorl	%eax, %eax
1437	movw	%di, %dx
1438	inl	(%dx)
1439	ret
1440	SET_SIZE(inl)
1441
1442#elif defined(__i386)
1443
1444	ENTRY(inl)
1445	movw	PORT(%esp), %dx
1446	inl	(%dx)
1447	ret
1448	SET_SIZE(inl)
1449
1450#endif	/* __i386 */
1451#endif	/* __lint */
1452
1453#if defined(__lint)
1454
1455/* ARGSUSED */
1456uint16_t
1457inw(int port_address)
1458{ return (0); }
1459
1460#else	/* __lint */
1461
1462#if defined(__amd64)
1463
1464	ENTRY(inw)
1465	xorl	%eax, %eax
1466	movw	%di, %dx
1467	D16 inl	(%dx)
1468	ret
1469	SET_SIZE(inw)
1470
1471#elif defined(__i386)
1472
1473	ENTRY(inw)
1474	subl	%eax, %eax
1475	movw	PORT(%esp), %dx
1476	D16 inl	(%dx)
1477	ret
1478	SET_SIZE(inw)
1479
1480#endif	/* __i386 */
1481#endif	/* __lint */
1482
1483
1484#if defined(__lint)
1485
1486/* ARGSUSED */
1487uint8_t
1488inb(int port_address)
1489{ return (0); }
1490
1491#else	/* __lint */
1492
1493#if defined(__amd64)
1494
1495	ENTRY(inb)
1496	xorl	%eax, %eax
1497	movw	%di, %dx
1498	inb	(%dx)
1499	ret
1500	SET_SIZE(inb)
1501
1502#elif defined(__i386)
1503
1504	ENTRY(inb)
1505	subl    %eax, %eax
1506	movw	PORT(%esp), %dx
1507	inb	(%dx)
1508	ret
1509	SET_SIZE(inb)
1510
1511#endif	/* __i386 */
1512#endif	/* __lint */
1513
1514
1515#if defined(__lint)
1516
1517/* ARGSUSED */
1518void
1519repoutsw(int port, uint16_t *addr, int cnt)
1520{}
1521
1522#else	/* __lint */
1523
1524#if defined(__amd64)
1525
1526	ENTRY(repoutsw)
1527	movl	%edx, %ecx
1528	movw	%di, %dx
1529	rep
1530	  D16 outsl
1531	ret
1532	SET_SIZE(repoutsw)
1533
1534#elif defined(__i386)
1535
1536	/*
1537	 * The arguments and saved registers are on the stack in the
1538	 *  following order:
1539	 *      |  cnt  |  +16
1540	 *      | *addr |  +12
1541	 *      | port  |  +8
1542	 *      |  eip  |  +4
1543	 *      |  esi  |  <-- %esp
1544	 * If additional values are pushed onto the stack, make sure
1545	 * to adjust the following constants accordingly.
1546	 */
1547	.set	PORT, 8
1548	.set	ADDR, 12
1549	.set	COUNT, 16
1550
1551	ENTRY(repoutsw)
1552	pushl	%esi
1553	movl	PORT(%esp), %edx
1554	movl	ADDR(%esp), %esi
1555	movl	COUNT(%esp), %ecx
1556	rep
1557	  D16 outsl
1558	popl	%esi
1559	ret
1560	SET_SIZE(repoutsw)
1561
1562#endif	/* __i386 */
1563#endif	/* __lint */
1564
1565
1566#if defined(__lint)
1567
1568/* ARGSUSED */
1569void
1570repinsw(int port_addr, uint16_t *addr, int cnt)
1571{}
1572
1573#else	/* __lint */
1574
1575#if defined(__amd64)
1576
1577	ENTRY(repinsw)
1578	movl	%edx, %ecx
1579	movw	%di, %dx
1580	rep
1581	  D16 insl
1582	ret
1583	SET_SIZE(repinsw)
1584
1585#elif defined(__i386)
1586
1587	ENTRY(repinsw)
1588	pushl	%edi
1589	movl	PORT(%esp), %edx
1590	movl	ADDR(%esp), %edi
1591	movl	COUNT(%esp), %ecx
1592	rep
1593	  D16 insl
1594	popl	%edi
1595	ret
1596	SET_SIZE(repinsw)
1597
1598#endif	/* __i386 */
1599#endif	/* __lint */
1600
1601
1602#if defined(__lint)
1603
1604/* ARGSUSED */
1605void
1606repinsb(int port, uint8_t *addr, int count)
1607{}
1608
1609#else	/* __lint */
1610
1611#if defined(__amd64)
1612
1613	ENTRY(repinsb)
1614	movl	%edx, %ecx
1615	movw	%di, %dx
1616	movq	%rsi, %rdi
1617	rep
1618	  insb
1619	ret
1620	SET_SIZE(repinsb)
1621
1622#elif defined(__i386)
1623
1624	/*
1625	 * The arguments and saved registers are on the stack in the
1626	 *  following order:
1627	 *      |  cnt  |  +16
1628	 *      | *addr |  +12
1629	 *      | port  |  +8
1630	 *      |  eip  |  +4
1631	 *      |  esi  |  <-- %esp
1632	 * If additional values are pushed onto the stack, make sure
1633	 * to adjust the following constants accordingly.
1634	 */
1635	.set	IO_PORT, 8
1636	.set	IO_ADDR, 12
1637	.set	IO_COUNT, 16
1638
1639	ENTRY(repinsb)
1640	pushl	%edi
1641	movl	IO_ADDR(%esp), %edi
1642	movl	IO_COUNT(%esp), %ecx
1643	movl	IO_PORT(%esp), %edx
1644	rep
1645	  insb
1646	popl	%edi
1647	ret
1648	SET_SIZE(repinsb)
1649
1650#endif	/* __i386 */
1651#endif	/* __lint */
1652
1653
1654/*
1655 * Input a stream of 32-bit words.
1656 * NOTE: count is a DWORD count.
1657 */
1658#if defined(__lint)
1659
1660/* ARGSUSED */
1661void
1662repinsd(int port, uint32_t *addr, int count)
1663{}
1664
1665#else	/* __lint */
1666
1667#if defined(__amd64)
1668
1669	ENTRY(repinsd)
1670	movl	%edx, %ecx
1671	movw	%di, %dx
1672	movq	%rsi, %rdi
1673	rep
1674	  insl
1675	ret
1676	SET_SIZE(repinsd)
1677
1678#elif defined(__i386)
1679
1680	ENTRY(repinsd)
1681	pushl	%edi
1682	movl	IO_ADDR(%esp), %edi
1683	movl	IO_COUNT(%esp), %ecx
1684	movl	IO_PORT(%esp), %edx
1685	rep
1686	  insl
1687	popl	%edi
1688	ret
1689	SET_SIZE(repinsd)
1690
1691#endif	/* __i386 */
1692#endif	/* __lint */
1693
1694/*
1695 * Output a stream of bytes
1696 * NOTE: count is a byte count
1697 */
1698#if defined(__lint)
1699
1700/* ARGSUSED */
1701void
1702repoutsb(int port, uint8_t *addr, int count)
1703{}
1704
1705#else	/* __lint */
1706
1707#if defined(__amd64)
1708
1709	ENTRY(repoutsb)
1710	movl	%edx, %ecx
1711	movw	%di, %dx
1712	rep
1713	  outsb
1714	ret
1715	SET_SIZE(repoutsb)
1716
1717#elif defined(__i386)
1718
1719	ENTRY(repoutsb)
1720	pushl	%esi
1721	movl	IO_ADDR(%esp), %esi
1722	movl	IO_COUNT(%esp), %ecx
1723	movl	IO_PORT(%esp), %edx
1724	rep
1725	  outsb
1726	popl	%esi
1727	ret
1728	SET_SIZE(repoutsb)
1729
1730#endif	/* __i386 */
1731#endif	/* __lint */
1732
1733/*
1734 * Output a stream of 32-bit words
1735 * NOTE: count is a DWORD count
1736 */
1737#if defined(__lint)
1738
1739/* ARGSUSED */
1740void
1741repoutsd(int port, uint32_t *addr, int count)
1742{}
1743
1744#else	/* __lint */
1745
1746#if defined(__amd64)
1747
1748	ENTRY(repoutsd)
1749	movl	%edx, %ecx
1750	movw	%di, %dx
1751	rep
1752	  outsl
1753	ret
1754	SET_SIZE(repoutsd)
1755
1756#elif defined(__i386)
1757
1758	ENTRY(repoutsd)
1759	pushl	%esi
1760	movl	IO_ADDR(%esp), %esi
1761	movl	IO_COUNT(%esp), %ecx
1762	movl	IO_PORT(%esp), %edx
1763	rep
1764	  outsl
1765	popl	%esi
1766	ret
1767	SET_SIZE(repoutsd)
1768
1769#endif	/* __i386 */
1770#endif	/* __lint */
1771
1772/*
1773 * void int3(void)
1774 * void int18(void)
1775 * void int20(void)
1776 */
1777
1778#if defined(__lint)
1779
1780void
1781int3(void)
1782{}
1783
1784void
1785int18(void)
1786{}
1787
1788void
1789int20(void)
1790{}
1791
1792#else	/* __lint */
1793
1794	ENTRY(int3)
1795	int	$T_BPTFLT
1796	ret
1797	SET_SIZE(int3)
1798
1799	ENTRY(int18)
1800	int	$T_MCE
1801	ret
1802	SET_SIZE(int18)
1803
1804	ENTRY(int20)
1805	movl	boothowto, %eax
1806	andl	$RB_DEBUG, %eax
1807	jz	1f
1808
1809	int	$T_DBGENTR
18101:
1811	rep;	ret	/* use 2 byte return instruction when branch target */
1812			/* AMD Software Optimization Guide - Section 6.2 */
1813	SET_SIZE(int20)
1814
1815#endif	/* __lint */
1816
1817#if defined(__lint)
1818
1819/* ARGSUSED */
1820int
1821scanc(size_t size, uchar_t *cp, uchar_t *table, uchar_t mask)
1822{ return (0); }
1823
1824#else	/* __lint */
1825
1826#if defined(__amd64)
1827
1828	ENTRY(scanc)
1829					/* rdi == size */
1830					/* rsi == cp */
1831					/* rdx == table */
1832					/* rcx == mask */
1833	addq	%rsi, %rdi		/* end = &cp[size] */
1834.scanloop:
1835	cmpq	%rdi, %rsi		/* while (cp < end */
1836	jnb	.scandone
1837	movzbq	(%rsi), %r8		/* %r8 = *cp */
1838	incq	%rsi			/* cp++ */
1839	testb	%cl, (%r8, %rdx)
1840	jz	.scanloop		/*  && (table[*cp] & mask) == 0) */
1841	decq	%rsi			/* (fix post-increment) */
1842.scandone:
1843	movl	%edi, %eax
1844	subl	%esi, %eax		/* return (end - cp) */
1845	ret
1846	SET_SIZE(scanc)
1847
1848#elif defined(__i386)
1849
1850	ENTRY(scanc)
1851	pushl	%edi
1852	pushl	%esi
1853	movb	24(%esp), %cl		/* mask = %cl */
1854	movl	16(%esp), %esi		/* cp = %esi */
1855	movl	20(%esp), %edx		/* table = %edx */
1856	movl	%esi, %edi
1857	addl	12(%esp), %edi		/* end = &cp[size]; */
1858.scanloop:
1859	cmpl	%edi, %esi		/* while (cp < end */
1860	jnb	.scandone
1861	movzbl	(%esi),  %eax		/* %al = *cp */
1862	incl	%esi			/* cp++ */
1863	movb	(%edx,  %eax), %al	/* %al = table[*cp] */
1864	testb	%al, %cl
1865	jz	.scanloop		/*   && (table[*cp] & mask) == 0) */
1866	dec	%esi			/* post-incremented */
1867.scandone:
1868	movl	%edi, %eax
1869	subl	%esi, %eax		/* return (end - cp) */
1870	popl	%esi
1871	popl	%edi
1872	ret
1873	SET_SIZE(scanc)
1874
1875#endif	/* __i386 */
1876#endif	/* __lint */
1877
1878/*
1879 * Replacement functions for ones that are normally inlined.
1880 * In addition to the copy in i86.il, they are defined here just in case.
1881 */
1882
1883#if defined(__lint)
1884
1885ulong_t
1886intr_clear(void)
1887{ return (0); }
1888
1889ulong_t
1890clear_int_flag(void)
1891{ return (0); }
1892
1893#else	/* __lint */
1894
1895#if defined(__amd64)
1896
1897	ENTRY(intr_clear)
1898	ENTRY(clear_int_flag)
1899	pushfq
1900	popq	%rax
1901#if defined(__xpv)
1902	leaq	xpv_panicking, %rdi
1903	movl	(%rdi), %edi
1904	cmpl	$0, %edi
1905	jne	2f
1906	CLIRET(%rdi, %dl)	/* returns event mask in %dl */
1907	/*
1908	 * Synthesize the PS_IE bit from the event mask bit
1909	 */
1910	andq    $_BITNOT(PS_IE), %rax
1911	testb	$1, %dl
1912	jnz	1f
1913	orq	$PS_IE, %rax
19141:
1915	ret
19162:
1917#endif
1918	CLI(%rdi)
1919	ret
1920	SET_SIZE(clear_int_flag)
1921	SET_SIZE(intr_clear)
1922
1923#elif defined(__i386)
1924
1925	ENTRY(intr_clear)
1926	ENTRY(clear_int_flag)
1927	pushfl
1928	popl	%eax
1929#if defined(__xpv)
1930	leal	xpv_panicking, %edx
1931	movl	(%edx), %edx
1932	cmpl	$0, %edx
1933	jne	2f
1934	CLIRET(%edx, %cl)	/* returns event mask in %cl */
1935	/*
1936	 * Synthesize the PS_IE bit from the event mask bit
1937	 */
1938	andl    $_BITNOT(PS_IE), %eax
1939	testb	$1, %cl
1940	jnz	1f
1941	orl	$PS_IE, %eax
19421:
1943	ret
19442:
1945#endif
1946	CLI(%edx)
1947	ret
1948	SET_SIZE(clear_int_flag)
1949	SET_SIZE(intr_clear)
1950
1951#endif	/* __i386 */
1952#endif	/* __lint */
1953
1954#if defined(__lint)
1955
1956struct cpu *
1957curcpup(void)
1958{ return 0; }
1959
1960#else	/* __lint */
1961
1962#if defined(__amd64)
1963
1964	ENTRY(curcpup)
1965	movq	%gs:CPU_SELF, %rax
1966	ret
1967	SET_SIZE(curcpup)
1968
1969#elif defined(__i386)
1970
1971	ENTRY(curcpup)
1972	movl	%gs:CPU_SELF, %eax
1973	ret
1974	SET_SIZE(curcpup)
1975
1976#endif	/* __i386 */
1977#endif	/* __lint */
1978
1979#if defined(__lint)
1980
1981/* ARGSUSED */
1982uint32_t
1983htonl(uint32_t i)
1984{ return (0); }
1985
1986/* ARGSUSED */
1987uint32_t
1988ntohl(uint32_t i)
1989{ return (0); }
1990
1991#else	/* __lint */
1992
1993#if defined(__amd64)
1994
1995	/* XX64 there must be shorter sequences for this */
1996	ENTRY(htonl)
1997	ALTENTRY(ntohl)
1998	movl	%edi, %eax
1999	bswap	%eax
2000	ret
2001	SET_SIZE(ntohl)
2002	SET_SIZE(htonl)
2003
2004#elif defined(__i386)
2005
2006	ENTRY(htonl)
2007	ALTENTRY(ntohl)
2008	movl	4(%esp), %eax
2009	bswap	%eax
2010	ret
2011	SET_SIZE(ntohl)
2012	SET_SIZE(htonl)
2013
2014#endif	/* __i386 */
2015#endif	/* __lint */
2016
2017#if defined(__lint)
2018
2019/* ARGSUSED */
2020uint16_t
2021htons(uint16_t i)
2022{ return (0); }
2023
2024/* ARGSUSED */
2025uint16_t
2026ntohs(uint16_t i)
2027{ return (0); }
2028
2029
2030#else	/* __lint */
2031
2032#if defined(__amd64)
2033
2034	/* XX64 there must be better sequences for this */
2035	ENTRY(htons)
2036	ALTENTRY(ntohs)
2037	movl	%edi, %eax
2038	bswap	%eax
2039	shrl	$16, %eax
2040	ret
2041	SET_SIZE(ntohs)
2042	SET_SIZE(htons)
2043
2044#elif defined(__i386)
2045
2046	ENTRY(htons)
2047	ALTENTRY(ntohs)
2048	movl	4(%esp), %eax
2049	bswap	%eax
2050	shrl	$16, %eax
2051	ret
2052	SET_SIZE(ntohs)
2053	SET_SIZE(htons)
2054
2055#endif	/* __i386 */
2056#endif	/* __lint */
2057
2058
2059#if defined(__lint)
2060
2061/* ARGSUSED */
2062void
2063intr_restore(ulong_t i)
2064{ return; }
2065
2066/* ARGSUSED */
2067void
2068restore_int_flag(ulong_t i)
2069{ return; }
2070
2071#else	/* __lint */
2072
2073#if defined(__amd64)
2074
2075	ENTRY(intr_restore)
2076	ENTRY(restore_int_flag)
2077	pushq	%rdi
2078	popfq
2079#if defined(__xpv)
2080	leaq	xpv_panicking, %rsi
2081	movl	(%rsi), %esi
2082	cmpl	$0, %esi
2083	jne	1f
2084	/*
2085	 * Since we're -really- running unprivileged, our attempt
2086	 * to change the state of the IF bit will be ignored.
2087	 * The virtual IF bit is tweaked by CLI and STI.
2088	 */
2089	IE_TO_EVENT_MASK(%rsi, %rdi)
20901:
2091#endif
2092	ret
2093	SET_SIZE(restore_int_flag)
2094	SET_SIZE(intr_restore)
2095
2096#elif defined(__i386)
2097
2098	ENTRY(intr_restore)
2099	ENTRY(restore_int_flag)
2100	movl	4(%esp), %eax
2101	pushl	%eax
2102	popfl
2103#if defined(__xpv)
2104	leal	xpv_panicking, %edx
2105	movl	(%edx), %edx
2106	cmpl	$0, %edx
2107	jne	1f
2108	/*
2109	 * Since we're -really- running unprivileged, our attempt
2110	 * to change the state of the IF bit will be ignored.
2111	 * The virtual IF bit is tweaked by CLI and STI.
2112	 */
2113	IE_TO_EVENT_MASK(%edx, %eax)
21141:
2115#endif
2116	ret
2117	SET_SIZE(restore_int_flag)
2118	SET_SIZE(intr_restore)
2119
2120#endif	/* __i386 */
2121#endif	/* __lint */
2122
2123#if defined(__lint)
2124
2125void
2126sti(void)
2127{}
2128
2129void
2130cli(void)
2131{}
2132
2133#else	/* __lint */
2134
2135	ENTRY(sti)
2136	STI
2137	ret
2138	SET_SIZE(sti)
2139
2140	ENTRY(cli)
2141#if defined(__amd64)
2142	CLI(%rax)
2143#elif defined(__i386)
2144	CLI(%eax)
2145#endif	/* __i386 */
2146	ret
2147	SET_SIZE(cli)
2148
2149#endif	/* __lint */
2150
2151#if defined(__lint)
2152
2153dtrace_icookie_t
2154dtrace_interrupt_disable(void)
2155{ return (0); }
2156
2157#else   /* __lint */
2158
2159#if defined(__amd64)
2160
2161	ENTRY(dtrace_interrupt_disable)
2162	pushfq
2163	popq	%rax
2164#if defined(__xpv)
2165	leaq	xpv_panicking, %rdi
2166	movl	(%rdi), %edi
2167	cmpl	$0, %edi
2168	jne	1f
2169	CLIRET(%rdi, %dl)	/* returns event mask in %dl */
2170	/*
2171	 * Synthesize the PS_IE bit from the event mask bit
2172	 */
2173	andq    $_BITNOT(PS_IE), %rax
2174	testb	$1, %dl
2175	jnz	1f
2176	orq	$PS_IE, %rax
21771:
2178#else
2179	CLI(%rdx)
2180#endif
2181	ret
2182	SET_SIZE(dtrace_interrupt_disable)
2183
2184#elif defined(__i386)
2185
2186	ENTRY(dtrace_interrupt_disable)
2187	pushfl
2188	popl	%eax
2189#if defined(__xpv)
2190	leal	xpv_panicking, %edx
2191	movl	(%edx), %edx
2192	cmpl	$0, %edx
2193	jne	1f
2194	CLIRET(%edx, %cl)	/* returns event mask in %cl */
2195	/*
2196	 * Synthesize the PS_IE bit from the event mask bit
2197	 */
2198	andl    $_BITNOT(PS_IE), %eax
2199	testb	$1, %cl
2200	jnz	1f
2201	orl	$PS_IE, %eax
22021:
2203#else
2204	CLI(%edx)
2205#endif
2206	ret
2207	SET_SIZE(dtrace_interrupt_disable)
2208
2209#endif	/* __i386 */
2210#endif	/* __lint */
2211
2212#if defined(__lint)
2213
2214/*ARGSUSED*/
2215void
2216dtrace_interrupt_enable(dtrace_icookie_t cookie)
2217{}
2218
2219#else	/* __lint */
2220
2221#if defined(__amd64)
2222
2223	ENTRY(dtrace_interrupt_enable)
2224	pushq	%rdi
2225	popfq
2226#if defined(__xpv)
2227	leaq	xpv_panicking, %rdx
2228	movl	(%rdx), %edx
2229	cmpl	$0, %edx
2230	jne	1f
2231	/*
2232	 * Since we're -really- running unprivileged, our attempt
2233	 * to change the state of the IF bit will be ignored. The
2234	 * virtual IF bit is tweaked by CLI and STI.
2235	 */
2236	IE_TO_EVENT_MASK(%rdx, %rdi)
2237#endif
2238	ret
2239	SET_SIZE(dtrace_interrupt_enable)
2240
2241#elif defined(__i386)
2242
2243	ENTRY(dtrace_interrupt_enable)
2244	movl	4(%esp), %eax
2245	pushl	%eax
2246	popfl
2247#if defined(__xpv)
2248	leal	xpv_panicking, %edx
2249	movl	(%edx), %edx
2250	cmpl	$0, %edx
2251	jne	1f
2252	/*
2253	 * Since we're -really- running unprivileged, our attempt
2254	 * to change the state of the IF bit will be ignored. The
2255	 * virtual IF bit is tweaked by CLI and STI.
2256	 */
2257	IE_TO_EVENT_MASK(%edx, %eax)
2258#endif
2259	ret
2260	SET_SIZE(dtrace_interrupt_enable)
2261
2262#endif	/* __i386 */
2263#endif	/* __lint */
2264
2265
2266#if defined(lint)
2267
2268void
2269dtrace_membar_producer(void)
2270{}
2271
2272void
2273dtrace_membar_consumer(void)
2274{}
2275
2276#else	/* __lint */
2277
2278	ENTRY(dtrace_membar_producer)
2279	rep;	ret	/* use 2 byte return instruction when branch target */
2280			/* AMD Software Optimization Guide - Section 6.2 */
2281	SET_SIZE(dtrace_membar_producer)
2282
2283	ENTRY(dtrace_membar_consumer)
2284	rep;	ret	/* use 2 byte return instruction when branch target */
2285			/* AMD Software Optimization Guide - Section 6.2 */
2286	SET_SIZE(dtrace_membar_consumer)
2287
2288#endif	/* __lint */
2289
2290#if defined(__lint)
2291
2292kthread_id_t
2293threadp(void)
2294{ return ((kthread_id_t)0); }
2295
2296#else	/* __lint */
2297
2298#if defined(__amd64)
2299
2300	ENTRY(threadp)
2301	movq	%gs:CPU_THREAD, %rax
2302	ret
2303	SET_SIZE(threadp)
2304
2305#elif defined(__i386)
2306
2307	ENTRY(threadp)
2308	movl	%gs:CPU_THREAD, %eax
2309	ret
2310	SET_SIZE(threadp)
2311
2312#endif	/* __i386 */
2313#endif	/* __lint */
2314
2315/*
2316 *   Checksum routine for Internet Protocol Headers
2317 */
2318
2319#if defined(__lint)
2320
2321/* ARGSUSED */
2322unsigned int
2323ip_ocsum(
2324	ushort_t *address,	/* ptr to 1st message buffer */
2325	int halfword_count,	/* length of data */
2326	unsigned int sum)	/* partial checksum */
2327{
2328	int		i;
2329	unsigned int	psum = 0;	/* partial sum */
2330
2331	for (i = 0; i < halfword_count; i++, address++) {
2332		psum += *address;
2333	}
2334
2335	while ((psum >> 16) != 0) {
2336		psum = (psum & 0xffff) + (psum >> 16);
2337	}
2338
2339	psum += sum;
2340
2341	while ((psum >> 16) != 0) {
2342		psum = (psum & 0xffff) + (psum >> 16);
2343	}
2344
2345	return (psum);
2346}
2347
2348#else	/* __lint */
2349
2350#if defined(__amd64)
2351
2352	ENTRY(ip_ocsum)
2353	pushq	%rbp
2354	movq	%rsp, %rbp
2355#ifdef DEBUG
2356	movq	postbootkernelbase(%rip), %rax
2357	cmpq	%rax, %rdi
2358	jnb	1f
2359	xorl	%eax, %eax
2360	movq	%rdi, %rsi
2361	leaq	.ip_ocsum_panic_msg(%rip), %rdi
2362	call	panic
2363	/*NOTREACHED*/
2364.ip_ocsum_panic_msg:
2365	.string	"ip_ocsum: address 0x%p below kernelbase\n"
23661:
2367#endif
2368	movl	%esi, %ecx	/* halfword_count */
2369	movq	%rdi, %rsi	/* address */
2370				/* partial sum in %edx */
2371	xorl	%eax, %eax
2372	testl	%ecx, %ecx
2373	jz	.ip_ocsum_done
2374	testq	$3, %rsi
2375	jnz	.ip_csum_notaligned
2376.ip_csum_aligned:	/* XX64 opportunities for 8-byte operations? */
2377.next_iter:
2378	/* XX64 opportunities for prefetch? */
2379	/* XX64 compute csum with 64 bit quantities? */
2380	subl	$32, %ecx
2381	jl	.less_than_32
2382
2383	addl	0(%rsi), %edx
2384.only60:
2385	adcl	4(%rsi), %eax
2386.only56:
2387	adcl	8(%rsi), %edx
2388.only52:
2389	adcl	12(%rsi), %eax
2390.only48:
2391	adcl	16(%rsi), %edx
2392.only44:
2393	adcl	20(%rsi), %eax
2394.only40:
2395	adcl	24(%rsi), %edx
2396.only36:
2397	adcl	28(%rsi), %eax
2398.only32:
2399	adcl	32(%rsi), %edx
2400.only28:
2401	adcl	36(%rsi), %eax
2402.only24:
2403	adcl	40(%rsi), %edx
2404.only20:
2405	adcl	44(%rsi), %eax
2406.only16:
2407	adcl	48(%rsi), %edx
2408.only12:
2409	adcl	52(%rsi), %eax
2410.only8:
2411	adcl	56(%rsi), %edx
2412.only4:
2413	adcl	60(%rsi), %eax	/* could be adding -1 and -1 with a carry */
2414.only0:
2415	adcl	$0, %eax	/* could be adding -1 in eax with a carry */
2416	adcl	$0, %eax
2417
2418	addq	$64, %rsi
2419	testl	%ecx, %ecx
2420	jnz	.next_iter
2421
2422.ip_ocsum_done:
2423	addl	%eax, %edx
2424	adcl	$0, %edx
2425	movl	%edx, %eax	/* form a 16 bit checksum by */
2426	shrl	$16, %eax	/* adding two halves of 32 bit checksum */
2427	addw	%dx, %ax
2428	adcw	$0, %ax
2429	andl	$0xffff, %eax
2430	leave
2431	ret
2432
2433.ip_csum_notaligned:
2434	xorl	%edi, %edi
2435	movw	(%rsi), %di
2436	addl	%edi, %edx
2437	adcl	$0, %edx
2438	addq	$2, %rsi
2439	decl	%ecx
2440	jmp	.ip_csum_aligned
2441
2442.less_than_32:
2443	addl	$32, %ecx
2444	testl	$1, %ecx
2445	jz	.size_aligned
2446	andl	$0xfe, %ecx
2447	movzwl	(%rsi, %rcx, 2), %edi
2448	addl	%edi, %edx
2449	adcl	$0, %edx
2450.size_aligned:
2451	movl	%ecx, %edi
2452	shrl	$1, %ecx
2453	shl	$1, %edi
2454	subq	$64, %rdi
2455	addq	%rdi, %rsi
2456	leaq    .ip_ocsum_jmptbl(%rip), %rdi
2457	leaq	(%rdi, %rcx, 8), %rdi
2458	xorl	%ecx, %ecx
2459	clc
2460	jmp 	*(%rdi)
2461
2462	.align	8
2463.ip_ocsum_jmptbl:
2464	.quad	.only0, .only4, .only8, .only12, .only16, .only20
2465	.quad	.only24, .only28, .only32, .only36, .only40, .only44
2466	.quad	.only48, .only52, .only56, .only60
2467	SET_SIZE(ip_ocsum)
2468
2469#elif defined(__i386)
2470
2471	ENTRY(ip_ocsum)
2472	pushl	%ebp
2473	movl	%esp, %ebp
2474	pushl	%ebx
2475	pushl	%esi
2476	pushl	%edi
2477	movl	12(%ebp), %ecx	/* count of half words */
2478	movl	16(%ebp), %edx	/* partial checksum */
2479	movl	8(%ebp), %esi
2480	xorl	%eax, %eax
2481	testl	%ecx, %ecx
2482	jz	.ip_ocsum_done
2483
2484	testl	$3, %esi
2485	jnz	.ip_csum_notaligned
2486.ip_csum_aligned:
2487.next_iter:
2488	subl	$32, %ecx
2489	jl	.less_than_32
2490
2491	addl	0(%esi), %edx
2492.only60:
2493	adcl	4(%esi), %eax
2494.only56:
2495	adcl	8(%esi), %edx
2496.only52:
2497	adcl	12(%esi), %eax
2498.only48:
2499	adcl	16(%esi), %edx
2500.only44:
2501	adcl	20(%esi), %eax
2502.only40:
2503	adcl	24(%esi), %edx
2504.only36:
2505	adcl	28(%esi), %eax
2506.only32:
2507	adcl	32(%esi), %edx
2508.only28:
2509	adcl	36(%esi), %eax
2510.only24:
2511	adcl	40(%esi), %edx
2512.only20:
2513	adcl	44(%esi), %eax
2514.only16:
2515	adcl	48(%esi), %edx
2516.only12:
2517	adcl	52(%esi), %eax
2518.only8:
2519	adcl	56(%esi), %edx
2520.only4:
2521	adcl	60(%esi), %eax	/* We could be adding -1 and -1 with a carry */
2522.only0:
2523	adcl	$0, %eax	/* we could be adding -1 in eax with a carry */
2524	adcl	$0, %eax
2525
2526	addl	$64, %esi
2527	andl	%ecx, %ecx
2528	jnz	.next_iter
2529
2530.ip_ocsum_done:
2531	addl	%eax, %edx
2532	adcl	$0, %edx
2533	movl	%edx, %eax	/* form a 16 bit checksum by */
2534	shrl	$16, %eax	/* adding two halves of 32 bit checksum */
2535	addw	%dx, %ax
2536	adcw	$0, %ax
2537	andl	$0xffff, %eax
2538	popl	%edi		/* restore registers */
2539	popl	%esi
2540	popl	%ebx
2541	leave
2542	ret
2543
2544.ip_csum_notaligned:
2545	xorl	%edi, %edi
2546	movw	(%esi), %di
2547	addl	%edi, %edx
2548	adcl	$0, %edx
2549	addl	$2, %esi
2550	decl	%ecx
2551	jmp	.ip_csum_aligned
2552
2553.less_than_32:
2554	addl	$32, %ecx
2555	testl	$1, %ecx
2556	jz	.size_aligned
2557	andl	$0xfe, %ecx
2558	movzwl	(%esi, %ecx, 2), %edi
2559	addl	%edi, %edx
2560	adcl	$0, %edx
2561.size_aligned:
2562	movl	%ecx, %edi
2563	shrl	$1, %ecx
2564	shl	$1, %edi
2565	subl	$64, %edi
2566	addl	%edi, %esi
2567	movl	$.ip_ocsum_jmptbl, %edi
2568	lea	(%edi, %ecx, 4), %edi
2569	xorl	%ecx, %ecx
2570	clc
2571	jmp 	*(%edi)
2572	SET_SIZE(ip_ocsum)
2573
2574	.data
2575	.align	4
2576
2577.ip_ocsum_jmptbl:
2578	.long	.only0, .only4, .only8, .only12, .only16, .only20
2579	.long	.only24, .only28, .only32, .only36, .only40, .only44
2580	.long	.only48, .only52, .only56, .only60
2581
2582
2583#endif	/* __i386 */
2584#endif	/* __lint */
2585
2586/*
2587 * multiply two long numbers and yield a u_longlong_t result, callable from C.
2588 * Provided to manipulate hrtime_t values.
2589 */
2590#if defined(__lint)
2591
2592/* result = a * b; */
2593
2594/* ARGSUSED */
2595unsigned long long
2596mul32(uint_t a, uint_t b)
2597{ return (0); }
2598
2599#else	/* __lint */
2600
2601#if defined(__amd64)
2602
2603	ENTRY(mul32)
2604	xorl	%edx, %edx	/* XX64 joe, paranoia? */
2605	movl	%edi, %eax
2606	mull	%esi
2607	shlq	$32, %rdx
2608	orq	%rdx, %rax
2609	ret
2610	SET_SIZE(mul32)
2611
2612#elif defined(__i386)
2613
2614	ENTRY(mul32)
2615	movl	8(%esp), %eax
2616	movl	4(%esp), %ecx
2617	mull	%ecx
2618	ret
2619	SET_SIZE(mul32)
2620
2621#endif	/* __i386 */
2622#endif	/* __lint */
2623
2624#if defined(notused)
2625#if defined(__lint)
2626/* ARGSUSED */
2627void
2628load_pte64(uint64_t *pte, uint64_t pte_value)
2629{}
2630#else	/* __lint */
2631	.globl load_pte64
2632load_pte64:
2633	movl	4(%esp), %eax
2634	movl	8(%esp), %ecx
2635	movl	12(%esp), %edx
2636	movl	%edx, 4(%eax)
2637	movl	%ecx, (%eax)
2638	ret
2639#endif	/* __lint */
2640#endif	/* notused */
2641
2642#if defined(__lint)
2643
2644/*ARGSUSED*/
2645void
2646scan_memory(caddr_t addr, size_t size)
2647{}
2648
2649#else	/* __lint */
2650
2651#if defined(__amd64)
2652
2653	ENTRY(scan_memory)
2654	shrq	$3, %rsi	/* convert %rsi from byte to quadword count */
2655	jz	.scanm_done
2656	movq	%rsi, %rcx	/* move count into rep control register */
2657	movq	%rdi, %rsi	/* move addr into lodsq control reg. */
2658	rep lodsq		/* scan the memory range */
2659.scanm_done:
2660	rep;	ret	/* use 2 byte return instruction when branch target */
2661			/* AMD Software Optimization Guide - Section 6.2 */
2662	SET_SIZE(scan_memory)
2663
2664#elif defined(__i386)
2665
2666	ENTRY(scan_memory)
2667	pushl	%ecx
2668	pushl	%esi
2669	movl	16(%esp), %ecx	/* move 2nd arg into rep control register */
2670	shrl	$2, %ecx	/* convert from byte count to word count */
2671	jz	.scanm_done
2672	movl	12(%esp), %esi	/* move 1st arg into lodsw control register */
2673	.byte	0xf3		/* rep prefix.  lame assembler.  sigh. */
2674	lodsl
2675.scanm_done:
2676	popl	%esi
2677	popl	%ecx
2678	ret
2679	SET_SIZE(scan_memory)
2680
2681#endif	/* __i386 */
2682#endif	/* __lint */
2683
2684
2685#if defined(__lint)
2686
2687/*ARGSUSED */
2688int
2689lowbit(ulong_t i)
2690{ return (0); }
2691
2692#else	/* __lint */
2693
2694#if defined(__amd64)
2695
2696	ENTRY(lowbit)
2697	movl	$-1, %eax
2698	bsfq	%rdi, %rax
2699	incl	%eax
2700	ret
2701	SET_SIZE(lowbit)
2702
2703#elif defined(__i386)
2704
2705	ENTRY(lowbit)
2706	movl	$-1, %eax
2707	bsfl	4(%esp), %eax
2708	incl	%eax
2709	ret
2710	SET_SIZE(lowbit)
2711
2712#endif	/* __i386 */
2713#endif	/* __lint */
2714
2715#if defined(__lint)
2716
2717/*ARGSUSED*/
2718int
2719highbit(ulong_t i)
2720{ return (0); }
2721
2722#else	/* __lint */
2723
2724#if defined(__amd64)
2725
2726	ENTRY(highbit)
2727	movl	$-1, %eax
2728	bsrq	%rdi, %rax
2729	incl	%eax
2730	ret
2731	SET_SIZE(highbit)
2732
2733#elif defined(__i386)
2734
2735	ENTRY(highbit)
2736	movl	$-1, %eax
2737	bsrl	4(%esp), %eax
2738	incl	%eax
2739	ret
2740	SET_SIZE(highbit)
2741
2742#endif	/* __i386 */
2743#endif	/* __lint */
2744
2745#if defined(__lint)
2746
2747/*ARGSUSED*/
2748uint64_t
2749rdmsr(uint_t r)
2750{ return (0); }
2751
2752/*ARGSUSED*/
2753void
2754wrmsr(uint_t r, const uint64_t val)
2755{}
2756
2757/*ARGSUSED*/
2758uint64_t
2759xrdmsr(uint_t r)
2760{ return (0); }
2761
2762/*ARGSUSED*/
2763void
2764xwrmsr(uint_t r, const uint64_t val)
2765{}
2766
2767void
2768invalidate_cache(void)
2769{}
2770
2771#else  /* __lint */
2772
2773#define	XMSR_ACCESS_VAL		$0x9c5a203a
2774
2775#if defined(__amd64)
2776
2777	ENTRY(rdmsr)
2778	movl	%edi, %ecx
2779	rdmsr
2780	shlq	$32, %rdx
2781	orq	%rdx, %rax
2782	ret
2783	SET_SIZE(rdmsr)
2784
2785	ENTRY(wrmsr)
2786	movq	%rsi, %rdx
2787	shrq	$32, %rdx
2788	movl	%esi, %eax
2789	movl	%edi, %ecx
2790	wrmsr
2791	ret
2792	SET_SIZE(wrmsr)
2793
2794	ENTRY(xrdmsr)
2795	pushq	%rbp
2796	movq	%rsp, %rbp
2797	movl	%edi, %ecx
2798	movl	XMSR_ACCESS_VAL, %edi	/* this value is needed to access MSR */
2799	rdmsr
2800	shlq	$32, %rdx
2801	orq	%rdx, %rax
2802	leave
2803	ret
2804	SET_SIZE(xrdmsr)
2805
2806	ENTRY(xwrmsr)
2807	pushq	%rbp
2808	movq	%rsp, %rbp
2809	movl	%edi, %ecx
2810	movl	XMSR_ACCESS_VAL, %edi	/* this value is needed to access MSR */
2811	movq	%rsi, %rdx
2812	shrq	$32, %rdx
2813	movl	%esi, %eax
2814	wrmsr
2815	leave
2816	ret
2817	SET_SIZE(xwrmsr)
2818
2819#elif defined(__i386)
2820
2821	ENTRY(rdmsr)
2822	movl	4(%esp), %ecx
2823	rdmsr
2824	ret
2825	SET_SIZE(rdmsr)
2826
2827	ENTRY(wrmsr)
2828	movl	4(%esp), %ecx
2829	movl	8(%esp), %eax
2830	movl	12(%esp), %edx
2831	wrmsr
2832	ret
2833	SET_SIZE(wrmsr)
2834
2835	ENTRY(xrdmsr)
2836	pushl	%ebp
2837	movl	%esp, %ebp
2838	movl	8(%esp), %ecx
2839	pushl	%edi
2840	movl	XMSR_ACCESS_VAL, %edi	/* this value is needed to access MSR */
2841	rdmsr
2842	popl	%edi
2843	leave
2844	ret
2845	SET_SIZE(xrdmsr)
2846
2847	ENTRY(xwrmsr)
2848	pushl	%ebp
2849	movl	%esp, %ebp
2850	movl	8(%esp), %ecx
2851	movl	12(%esp), %eax
2852	movl	16(%esp), %edx
2853	pushl	%edi
2854	movl	XMSR_ACCESS_VAL, %edi	/* this value is needed to access MSR */
2855	wrmsr
2856	popl	%edi
2857	leave
2858	ret
2859	SET_SIZE(xwrmsr)
2860
2861#endif	/* __i386 */
2862
2863	ENTRY(invalidate_cache)
2864	wbinvd
2865	ret
2866	SET_SIZE(invalidate_cache)
2867
2868#endif	/* __lint */
2869
2870#if defined(__lint)
2871
2872/*ARGSUSED*/
2873void
2874getcregs(struct cregs *crp)
2875{}
2876
2877#else	/* __lint */
2878
2879#if defined(__amd64)
2880
2881	ENTRY_NP(getcregs)
2882#if defined(__xpv)
2883	/*
2884	 * Only a few of the hardware control registers or descriptor tables
2885	 * are directly accessible to us, so just zero the structure.
2886	 *
2887	 * XXPV	Perhaps it would be helpful for the hypervisor to return
2888	 *	virtualized versions of these for post-mortem use.
2889	 *	(Need to reevaluate - perhaps it already does!)
2890	 */
2891	pushq	%rdi		/* save *crp */
2892	movq	$CREGSZ, %rsi
2893	call	bzero
2894	popq	%rdi
2895
2896	/*
2897	 * Dump what limited information we can
2898	 */
2899	movq	%cr0, %rax
2900	movq	%rax, CREG_CR0(%rdi)	/* cr0 */
2901	movq	%cr2, %rax
2902	movq	%rax, CREG_CR2(%rdi)	/* cr2 */
2903	movq	%cr3, %rax
2904	movq	%rax, CREG_CR3(%rdi)	/* cr3 */
2905	movq	%cr4, %rax
2906	movq	%rax, CREG_CR4(%rdi)	/* cr4 */
2907
2908#else	/* __xpv */
2909
2910#define	GETMSR(r, off, d)	\
2911	movl	$r, %ecx;	\
2912	rdmsr;			\
2913	movl	%eax, off(d);	\
2914	movl	%edx, off+4(d)
2915
2916	xorl	%eax, %eax
2917	movq	%rax, CREG_GDT+8(%rdi)
2918	sgdt	CREG_GDT(%rdi)		/* 10 bytes */
2919	movq	%rax, CREG_IDT+8(%rdi)
2920	sidt	CREG_IDT(%rdi)		/* 10 bytes */
2921	movq	%rax, CREG_LDT(%rdi)
2922	sldt	CREG_LDT(%rdi)		/* 2 bytes */
2923	movq	%rax, CREG_TASKR(%rdi)
2924	str	CREG_TASKR(%rdi)	/* 2 bytes */
2925	movq	%cr0, %rax
2926	movq	%rax, CREG_CR0(%rdi)	/* cr0 */
2927	movq	%cr2, %rax
2928	movq	%rax, CREG_CR2(%rdi)	/* cr2 */
2929	movq	%cr3, %rax
2930	movq	%rax, CREG_CR3(%rdi)	/* cr3 */
2931	movq	%cr4, %rax
2932	movq	%rax, CREG_CR4(%rdi)	/* cr4 */
2933	movq	%cr8, %rax
2934	movq	%rax, CREG_CR8(%rdi)	/* cr8 */
2935	GETMSR(MSR_AMD_KGSBASE, CREG_KGSBASE, %rdi)
2936	GETMSR(MSR_AMD_EFER, CREG_EFER, %rdi)
2937#endif	/* __xpv */
2938	ret
2939	SET_SIZE(getcregs)
2940
2941#undef GETMSR
2942
2943#elif defined(__i386)
2944
2945	ENTRY_NP(getcregs)
2946#if defined(__xpv)
2947	/*
2948	 * Only a few of the hardware control registers or descriptor tables
2949	 * are directly accessible to us, so just zero the structure.
2950	 *
2951	 * XXPV	Perhaps it would be helpful for the hypervisor to return
2952	 *	virtualized versions of these for post-mortem use.
2953	 *	(Need to reevaluate - perhaps it already does!)
2954	 */
2955	movl	4(%esp), %edx
2956	pushl	$CREGSZ
2957	pushl	%edx
2958	call	bzero
2959	addl	$8, %esp
2960	movl	4(%esp), %edx
2961
2962	/*
2963	 * Dump what limited information we can
2964	 */
2965	movl	%cr0, %eax
2966	movl	%eax, CREG_CR0(%edx)	/* cr0 */
2967	movl	%cr2, %eax
2968	movl	%eax, CREG_CR2(%edx)	/* cr2 */
2969	movl	%cr3, %eax
2970	movl	%eax, CREG_CR3(%edx)	/* cr3 */
2971	movl	%cr4, %eax
2972	movl	%eax, CREG_CR4(%edx)	/* cr4 */
2973
2974#else	/* __xpv */
2975
2976	movl	4(%esp), %edx
2977	movw	$0, CREG_GDT+6(%edx)
2978	movw	$0, CREG_IDT+6(%edx)
2979	sgdt	CREG_GDT(%edx)		/* gdt */
2980	sidt	CREG_IDT(%edx)		/* idt */
2981	sldt	CREG_LDT(%edx)		/* ldt */
2982	str	CREG_TASKR(%edx)	/* task */
2983	movl	%cr0, %eax
2984	movl	%eax, CREG_CR0(%edx)	/* cr0 */
2985	movl	%cr2, %eax
2986	movl	%eax, CREG_CR2(%edx)	/* cr2 */
2987	movl	%cr3, %eax
2988	movl	%eax, CREG_CR3(%edx)	/* cr3 */
2989	testl	$X86_LARGEPAGE, x86_feature
2990	jz	.nocr4
2991	movl	%cr4, %eax
2992	movl	%eax, CREG_CR4(%edx)	/* cr4 */
2993	jmp	.skip
2994.nocr4:
2995	movl	$0, CREG_CR4(%edx)
2996.skip:
2997#endif
2998	ret
2999	SET_SIZE(getcregs)
3000
3001#endif	/* __i386 */
3002#endif	/* __lint */
3003
3004
3005/*
3006 * A panic trigger is a word which is updated atomically and can only be set
3007 * once.  We atomically store 0xDEFACEDD and load the old value.  If the
3008 * previous value was 0, we succeed and return 1; otherwise return 0.
3009 * This allows a partially corrupt trigger to still trigger correctly.  DTrace
3010 * has its own version of this function to allow it to panic correctly from
3011 * probe context.
3012 */
3013#if defined(__lint)
3014
3015/*ARGSUSED*/
3016int
3017panic_trigger(int *tp)
3018{ return (0); }
3019
3020/*ARGSUSED*/
3021int
3022dtrace_panic_trigger(int *tp)
3023{ return (0); }
3024
3025#else	/* __lint */
3026
3027#if defined(__amd64)
3028
3029	ENTRY_NP(panic_trigger)
3030	xorl	%eax, %eax
3031	movl	$0xdefacedd, %edx
3032	lock
3033	  xchgl	%edx, (%rdi)
3034	cmpl	$0, %edx
3035	je	0f
3036	movl	$0, %eax
3037	ret
30380:	movl	$1, %eax
3039	ret
3040	SET_SIZE(panic_trigger)
3041
3042	ENTRY_NP(dtrace_panic_trigger)
3043	xorl	%eax, %eax
3044	movl	$0xdefacedd, %edx
3045	lock
3046	  xchgl	%edx, (%rdi)
3047	cmpl	$0, %edx
3048	je	0f
3049	movl	$0, %eax
3050	ret
30510:	movl	$1, %eax
3052	ret
3053	SET_SIZE(dtrace_panic_trigger)
3054
3055#elif defined(__i386)
3056
3057	ENTRY_NP(panic_trigger)
3058	movl	4(%esp), %edx		/ %edx = address of trigger
3059	movl	$0xdefacedd, %eax	/ %eax = 0xdefacedd
3060	lock				/ assert lock
3061	xchgl %eax, (%edx)		/ exchange %eax and the trigger
3062	cmpl	$0, %eax		/ if (%eax == 0x0)
3063	je	0f			/   return (1);
3064	movl	$0, %eax		/ else
3065	ret				/   return (0);
30660:	movl	$1, %eax
3067	ret
3068	SET_SIZE(panic_trigger)
3069
3070	ENTRY_NP(dtrace_panic_trigger)
3071	movl	4(%esp), %edx		/ %edx = address of trigger
3072	movl	$0xdefacedd, %eax	/ %eax = 0xdefacedd
3073	lock				/ assert lock
3074	xchgl %eax, (%edx)		/ exchange %eax and the trigger
3075	cmpl	$0, %eax		/ if (%eax == 0x0)
3076	je	0f			/   return (1);
3077	movl	$0, %eax		/ else
3078	ret				/   return (0);
30790:	movl	$1, %eax
3080	ret
3081	SET_SIZE(dtrace_panic_trigger)
3082
3083#endif	/* __i386 */
3084#endif	/* __lint */
3085
3086/*
3087 * The panic() and cmn_err() functions invoke vpanic() as a common entry point
3088 * into the panic code implemented in panicsys().  vpanic() is responsible
3089 * for passing through the format string and arguments, and constructing a
3090 * regs structure on the stack into which it saves the current register
3091 * values.  If we are not dying due to a fatal trap, these registers will
3092 * then be preserved in panicbuf as the current processor state.  Before
3093 * invoking panicsys(), vpanic() activates the first panic trigger (see
3094 * common/os/panic.c) and switches to the panic_stack if successful.  Note that
3095 * DTrace takes a slightly different panic path if it must panic from probe
3096 * context.  Instead of calling panic, it calls into dtrace_vpanic(), which
3097 * sets up the initial stack as vpanic does, calls dtrace_panic_trigger(), and
3098 * branches back into vpanic().
3099 */
3100#if defined(__lint)
3101
3102/*ARGSUSED*/
3103void
3104vpanic(const char *format, va_list alist)
3105{}
3106
3107/*ARGSUSED*/
3108void
3109dtrace_vpanic(const char *format, va_list alist)
3110{}
3111
3112#else	/* __lint */
3113
3114#if defined(__amd64)
3115
3116	ENTRY_NP(vpanic)			/* Initial stack layout: */
3117
3118	pushq	%rbp				/* | %rip | 	0x60	*/
3119	movq	%rsp, %rbp			/* | %rbp |	0x58	*/
3120	pushfq					/* | rfl  |	0x50	*/
3121	pushq	%r11				/* | %r11 |	0x48	*/
3122	pushq	%r10				/* | %r10 |	0x40	*/
3123	pushq	%rbx				/* | %rbx |	0x38	*/
3124	pushq	%rax				/* | %rax |	0x30	*/
3125	pushq	%r9				/* | %r9  |	0x28	*/
3126	pushq	%r8				/* | %r8  |	0x20	*/
3127	pushq	%rcx				/* | %rcx |	0x18	*/
3128	pushq	%rdx				/* | %rdx |	0x10	*/
3129	pushq	%rsi				/* | %rsi |	0x8 alist */
3130	pushq	%rdi				/* | %rdi |	0x0 format */
3131
3132	movq	%rsp, %rbx			/* %rbx = current %rsp */
3133
3134	leaq	panic_quiesce(%rip), %rdi	/* %rdi = &panic_quiesce */
3135	call	panic_trigger			/* %eax = panic_trigger() */
3136
3137vpanic_common:
3138	/*
3139	 * The panic_trigger result is in %eax from the call above, and
3140	 * dtrace_panic places it in %eax before branching here.
3141	 * The rdmsr instructions that follow below will clobber %eax so
3142	 * we stash the panic_trigger result in %r11d.
3143	 */
3144	movl	%eax, %r11d
3145	cmpl	$0, %r11d
3146	je	0f
3147
3148	/*
3149	 * If panic_trigger() was successful, we are the first to initiate a
3150	 * panic: we now switch to the reserved panic_stack before continuing.
3151	 */
3152	leaq	panic_stack(%rip), %rsp
3153	addq	$PANICSTKSIZE, %rsp
31540:	subq	$REGSIZE, %rsp
3155	/*
3156	 * Now that we've got everything set up, store the register values as
3157	 * they were when we entered vpanic() to the designated location in
3158	 * the regs structure we allocated on the stack.
3159	 */
3160	movq	0x0(%rbx), %rcx
3161	movq	%rcx, REGOFF_RDI(%rsp)
3162	movq	0x8(%rbx), %rcx
3163	movq	%rcx, REGOFF_RSI(%rsp)
3164	movq	0x10(%rbx), %rcx
3165	movq	%rcx, REGOFF_RDX(%rsp)
3166	movq	0x18(%rbx), %rcx
3167	movq	%rcx, REGOFF_RCX(%rsp)
3168	movq	0x20(%rbx), %rcx
3169
3170	movq	%rcx, REGOFF_R8(%rsp)
3171	movq	0x28(%rbx), %rcx
3172	movq	%rcx, REGOFF_R9(%rsp)
3173	movq	0x30(%rbx), %rcx
3174	movq	%rcx, REGOFF_RAX(%rsp)
3175	movq	0x38(%rbx), %rcx
3176	movq	%rcx, REGOFF_RBX(%rsp)
3177	movq	0x58(%rbx), %rcx
3178
3179	movq	%rcx, REGOFF_RBP(%rsp)
3180	movq	0x40(%rbx), %rcx
3181	movq	%rcx, REGOFF_R10(%rsp)
3182	movq	0x48(%rbx), %rcx
3183	movq	%rcx, REGOFF_R11(%rsp)
3184	movq	%r12, REGOFF_R12(%rsp)
3185
3186	movq	%r13, REGOFF_R13(%rsp)
3187	movq	%r14, REGOFF_R14(%rsp)
3188	movq	%r15, REGOFF_R15(%rsp)
3189
3190	xorl	%ecx, %ecx
3191	movw	%ds, %cx
3192	movq	%rcx, REGOFF_DS(%rsp)
3193	movw	%es, %cx
3194	movq	%rcx, REGOFF_ES(%rsp)
3195	movw	%fs, %cx
3196	movq	%rcx, REGOFF_FS(%rsp)
3197	movw	%gs, %cx
3198	movq	%rcx, REGOFF_GS(%rsp)
3199
3200	movq	$0, REGOFF_TRAPNO(%rsp)
3201
3202	movq	$0, REGOFF_ERR(%rsp)
3203	leaq	vpanic(%rip), %rcx
3204	movq	%rcx, REGOFF_RIP(%rsp)
3205	movw	%cs, %cx
3206	movzwq	%cx, %rcx
3207	movq	%rcx, REGOFF_CS(%rsp)
3208	movq	0x50(%rbx), %rcx
3209	movq	%rcx, REGOFF_RFL(%rsp)
3210	movq	%rbx, %rcx
3211	addq	$0x60, %rcx
3212	movq	%rcx, REGOFF_RSP(%rsp)
3213	movw	%ss, %cx
3214	movzwq	%cx, %rcx
3215	movq	%rcx, REGOFF_SS(%rsp)
3216
3217	/*
3218	 * panicsys(format, alist, rp, on_panic_stack)
3219	 */
3220	movq	REGOFF_RDI(%rsp), %rdi		/* format */
3221	movq	REGOFF_RSI(%rsp), %rsi		/* alist */
3222	movq	%rsp, %rdx			/* struct regs */
3223	movl	%r11d, %ecx			/* on_panic_stack */
3224	call	panicsys
3225	addq	$REGSIZE, %rsp
3226	popq	%rdi
3227	popq	%rsi
3228	popq	%rdx
3229	popq	%rcx
3230	popq	%r8
3231	popq	%r9
3232	popq	%rax
3233	popq	%rbx
3234	popq	%r10
3235	popq	%r11
3236	popfq
3237	leave
3238	ret
3239	SET_SIZE(vpanic)
3240
3241	ENTRY_NP(dtrace_vpanic)			/* Initial stack layout: */
3242
3243	pushq	%rbp				/* | %rip | 	0x60	*/
3244	movq	%rsp, %rbp			/* | %rbp |	0x58	*/
3245	pushfq					/* | rfl  |	0x50	*/
3246	pushq	%r11				/* | %r11 |	0x48	*/
3247	pushq	%r10				/* | %r10 |	0x40	*/
3248	pushq	%rbx				/* | %rbx |	0x38	*/
3249	pushq	%rax				/* | %rax |	0x30	*/
3250	pushq	%r9				/* | %r9  |	0x28	*/
3251	pushq	%r8				/* | %r8  |	0x20	*/
3252	pushq	%rcx				/* | %rcx |	0x18	*/
3253	pushq	%rdx				/* | %rdx |	0x10	*/
3254	pushq	%rsi				/* | %rsi |	0x8 alist */
3255	pushq	%rdi				/* | %rdi |	0x0 format */
3256
3257	movq	%rsp, %rbx			/* %rbx = current %rsp */
3258
3259	leaq	panic_quiesce(%rip), %rdi	/* %rdi = &panic_quiesce */
3260	call	dtrace_panic_trigger	/* %eax = dtrace_panic_trigger() */
3261	jmp	vpanic_common
3262
3263	SET_SIZE(dtrace_vpanic)
3264
3265#elif defined(__i386)
3266
3267	ENTRY_NP(vpanic)			/ Initial stack layout:
3268
3269	pushl	%ebp				/ | %eip | 20
3270	movl	%esp, %ebp			/ | %ebp | 16
3271	pushl	%eax				/ | %eax | 12
3272	pushl	%ebx				/ | %ebx |  8
3273	pushl	%ecx				/ | %ecx |  4
3274	pushl	%edx				/ | %edx |  0
3275
3276	movl	%esp, %ebx			/ %ebx = current stack pointer
3277
3278	lea	panic_quiesce, %eax		/ %eax = &panic_quiesce
3279	pushl	%eax				/ push &panic_quiesce
3280	call	panic_trigger			/ %eax = panic_trigger()
3281	addl	$4, %esp			/ reset stack pointer
3282
3283vpanic_common:
3284	cmpl	$0, %eax			/ if (%eax == 0)
3285	je	0f				/   goto 0f;
3286
3287	/*
3288	 * If panic_trigger() was successful, we are the first to initiate a
3289	 * panic: we now switch to the reserved panic_stack before continuing.
3290	 */
3291	lea	panic_stack, %esp		/ %esp  = panic_stack
3292	addl	$PANICSTKSIZE, %esp		/ %esp += PANICSTKSIZE
3293
32940:	subl	$REGSIZE, %esp			/ allocate struct regs
3295
3296	/*
3297	 * Now that we've got everything set up, store the register values as
3298	 * they were when we entered vpanic() to the designated location in
3299	 * the regs structure we allocated on the stack.
3300	 */
3301#if !defined(__GNUC_AS__)
3302	movw	%gs, %edx
3303	movl	%edx, REGOFF_GS(%esp)
3304	movw	%fs, %edx
3305	movl	%edx, REGOFF_FS(%esp)
3306	movw	%es, %edx
3307	movl	%edx, REGOFF_ES(%esp)
3308	movw	%ds, %edx
3309	movl	%edx, REGOFF_DS(%esp)
3310#else	/* __GNUC_AS__ */
3311	mov	%gs, %edx
3312	mov	%edx, REGOFF_GS(%esp)
3313	mov	%fs, %edx
3314	mov	%edx, REGOFF_FS(%esp)
3315	mov	%es, %edx
3316	mov	%edx, REGOFF_ES(%esp)
3317	mov	%ds, %edx
3318	mov	%edx, REGOFF_DS(%esp)
3319#endif	/* __GNUC_AS__ */
3320	movl	%edi, REGOFF_EDI(%esp)
3321	movl	%esi, REGOFF_ESI(%esp)
3322	movl	16(%ebx), %ecx
3323	movl	%ecx, REGOFF_EBP(%esp)
3324	movl	%ebx, %ecx
3325	addl	$20, %ecx
3326	movl	%ecx, REGOFF_ESP(%esp)
3327	movl	8(%ebx), %ecx
3328	movl	%ecx, REGOFF_EBX(%esp)
3329	movl	0(%ebx), %ecx
3330	movl	%ecx, REGOFF_EDX(%esp)
3331	movl	4(%ebx), %ecx
3332	movl	%ecx, REGOFF_ECX(%esp)
3333	movl	12(%ebx), %ecx
3334	movl	%ecx, REGOFF_EAX(%esp)
3335	movl	$0, REGOFF_TRAPNO(%esp)
3336	movl	$0, REGOFF_ERR(%esp)
3337	lea	vpanic, %ecx
3338	movl	%ecx, REGOFF_EIP(%esp)
3339#if !defined(__GNUC_AS__)
3340	movw	%cs, %edx
3341#else	/* __GNUC_AS__ */
3342	mov	%cs, %edx
3343#endif	/* __GNUC_AS__ */
3344	movl	%edx, REGOFF_CS(%esp)
3345	pushfl
3346	popl	%ecx
3347#if defined(__xpv)
3348	/*
3349	 * Synthesize the PS_IE bit from the event mask bit
3350	 */
3351	CURTHREAD(%edx)
3352	KPREEMPT_DISABLE(%edx)
3353	EVENT_MASK_TO_IE(%edx, %ecx)
3354	CURTHREAD(%edx)
3355	KPREEMPT_ENABLE_NOKP(%edx)
3356#endif
3357	movl	%ecx, REGOFF_EFL(%esp)
3358	movl	$0, REGOFF_UESP(%esp)
3359#if !defined(__GNUC_AS__)
3360	movw	%ss, %edx
3361#else	/* __GNUC_AS__ */
3362	mov	%ss, %edx
3363#endif	/* __GNUC_AS__ */
3364	movl	%edx, REGOFF_SS(%esp)
3365
3366	movl	%esp, %ecx			/ %ecx = &regs
3367	pushl	%eax				/ push on_panic_stack
3368	pushl	%ecx				/ push &regs
3369	movl	12(%ebp), %ecx			/ %ecx = alist
3370	pushl	%ecx				/ push alist
3371	movl	8(%ebp), %ecx			/ %ecx = format
3372	pushl	%ecx				/ push format
3373	call	panicsys			/ panicsys();
3374	addl	$16, %esp			/ pop arguments
3375
3376	addl	$REGSIZE, %esp
3377	popl	%edx
3378	popl	%ecx
3379	popl	%ebx
3380	popl	%eax
3381	leave
3382	ret
3383	SET_SIZE(vpanic)
3384
3385	ENTRY_NP(dtrace_vpanic)			/ Initial stack layout:
3386
3387	pushl	%ebp				/ | %eip | 20
3388	movl	%esp, %ebp			/ | %ebp | 16
3389	pushl	%eax				/ | %eax | 12
3390	pushl	%ebx				/ | %ebx |  8
3391	pushl	%ecx				/ | %ecx |  4
3392	pushl	%edx				/ | %edx |  0
3393
3394	movl	%esp, %ebx			/ %ebx = current stack pointer
3395
3396	lea	panic_quiesce, %eax		/ %eax = &panic_quiesce
3397	pushl	%eax				/ push &panic_quiesce
3398	call	dtrace_panic_trigger		/ %eax = dtrace_panic_trigger()
3399	addl	$4, %esp			/ reset stack pointer
3400	jmp	vpanic_common			/ jump back to common code
3401
3402	SET_SIZE(dtrace_vpanic)
3403
3404#endif	/* __i386 */
3405#endif	/* __lint */
3406
3407#if defined(__lint)
3408
3409void
3410hres_tick(void)
3411{}
3412
3413int64_t timedelta;
3414hrtime_t hres_last_tick;
3415volatile timestruc_t hrestime;
3416int64_t hrestime_adj;
3417volatile int hres_lock;
3418hrtime_t hrtime_base;
3419
3420#else	/* __lint */
3421
3422	DGDEF3(hrestime, _MUL(2, CLONGSIZE), 8)
3423	.NWORD	0, 0
3424
3425	DGDEF3(hrestime_adj, 8, 8)
3426	.long	0, 0
3427
3428	DGDEF3(hres_last_tick, 8, 8)
3429	.long	0, 0
3430
3431	DGDEF3(timedelta, 8, 8)
3432	.long	0, 0
3433
3434	DGDEF3(hres_lock, 4, 8)
3435	.long	0
3436
3437	/*
3438	 * initialized to a non zero value to make pc_gethrtime()
3439	 * work correctly even before clock is initialized
3440	 */
3441	DGDEF3(hrtime_base, 8, 8)
3442	.long	_MUL(NSEC_PER_CLOCK_TICK, 6), 0
3443
3444	DGDEF3(adj_shift, 4, 4)
3445	.long	ADJ_SHIFT
3446
3447#if defined(__amd64)
3448
3449	ENTRY_NP(hres_tick)
3450	pushq	%rbp
3451	movq	%rsp, %rbp
3452
3453	/*
3454	 * We need to call *gethrtimef before picking up CLOCK_LOCK (obviously,
3455	 * hres_last_tick can only be modified while holding CLOCK_LOCK).
3456	 * At worst, performing this now instead of under CLOCK_LOCK may
3457	 * introduce some jitter in pc_gethrestime().
3458	 */
3459	call	*gethrtimef(%rip)
3460	movq	%rax, %r8
3461
3462	leaq	hres_lock(%rip), %rax
3463	movb	$-1, %dl
3464.CL1:
3465	xchgb	%dl, (%rax)
3466	testb	%dl, %dl
3467	jz	.CL3			/* got it */
3468.CL2:
3469	cmpb	$0, (%rax)		/* possible to get lock? */
3470	pause
3471	jne	.CL2
3472	jmp	.CL1			/* yes, try again */
3473.CL3:
3474	/*
3475	 * compute the interval since last time hres_tick was called
3476	 * and adjust hrtime_base and hrestime accordingly
3477	 * hrtime_base is an 8 byte value (in nsec), hrestime is
3478	 * a timestruc_t (sec, nsec)
3479	 */
3480	leaq	hres_last_tick(%rip), %rax
3481	movq	%r8, %r11
3482	subq	(%rax), %r8
3483	addq	%r8, hrtime_base(%rip)	/* add interval to hrtime_base */
3484	addq	%r8, hrestime+8(%rip)	/* add interval to hrestime.tv_nsec */
3485	/*
3486	 * Now that we have CLOCK_LOCK, we can update hres_last_tick
3487	 */
3488	movq	%r11, (%rax)
3489
3490	call	__adj_hrestime
3491
3492	/*
3493	 * release the hres_lock
3494	 */
3495	incl	hres_lock(%rip)
3496	leave
3497	ret
3498	SET_SIZE(hres_tick)
3499
3500#elif defined(__i386)
3501
3502	ENTRY_NP(hres_tick)
3503	pushl	%ebp
3504	movl	%esp, %ebp
3505	pushl	%esi
3506	pushl	%ebx
3507
3508	/*
3509	 * We need to call *gethrtimef before picking up CLOCK_LOCK (obviously,
3510	 * hres_last_tick can only be modified while holding CLOCK_LOCK).
3511	 * At worst, performing this now instead of under CLOCK_LOCK may
3512	 * introduce some jitter in pc_gethrestime().
3513	 */
3514	call	*gethrtimef
3515	movl	%eax, %ebx
3516	movl	%edx, %esi
3517
3518	movl	$hres_lock, %eax
3519	movl	$-1, %edx
3520.CL1:
3521	xchgb	%dl, (%eax)
3522	testb	%dl, %dl
3523	jz	.CL3			/ got it
3524.CL2:
3525	cmpb	$0, (%eax)		/ possible to get lock?
3526	pause
3527	jne	.CL2
3528	jmp	.CL1			/ yes, try again
3529.CL3:
3530	/*
3531	 * compute the interval since last time hres_tick was called
3532	 * and adjust hrtime_base and hrestime accordingly
3533	 * hrtime_base is an 8 byte value (in nsec), hrestime is
3534	 * timestruc_t (sec, nsec)
3535	 */
3536
3537	lea	hres_last_tick, %eax
3538
3539	movl	%ebx, %edx
3540	movl	%esi, %ecx
3541
3542	subl 	(%eax), %edx
3543	sbbl 	4(%eax), %ecx
3544
3545	addl	%edx, hrtime_base	/ add interval to hrtime_base
3546	adcl	%ecx, hrtime_base+4
3547
3548	addl 	%edx, hrestime+4	/ add interval to hrestime.tv_nsec
3549
3550	/
3551	/ Now that we have CLOCK_LOCK, we can update hres_last_tick.
3552	/
3553	movl	%ebx, (%eax)
3554	movl	%esi,  4(%eax)
3555
3556	/ get hrestime at this moment. used as base for pc_gethrestime
3557	/
3558	/ Apply adjustment, if any
3559	/
3560	/ #define HRES_ADJ	(NSEC_PER_CLOCK_TICK >> ADJ_SHIFT)
3561	/ (max_hres_adj)
3562	/
3563	/ void
3564	/ adj_hrestime()
3565	/ {
3566	/	long long adj;
3567	/
3568	/	if (hrestime_adj == 0)
3569	/		adj = 0;
3570	/	else if (hrestime_adj > 0) {
3571	/		if (hrestime_adj < HRES_ADJ)
3572	/			adj = hrestime_adj;
3573	/		else
3574	/			adj = HRES_ADJ;
3575	/	}
3576	/	else {
3577	/		if (hrestime_adj < -(HRES_ADJ))
3578	/			adj = -(HRES_ADJ);
3579	/		else
3580	/			adj = hrestime_adj;
3581	/	}
3582	/
3583	/	timedelta -= adj;
3584	/	hrestime_adj = timedelta;
3585	/	hrestime.tv_nsec += adj;
3586	/
3587	/	while (hrestime.tv_nsec >= NANOSEC) {
3588	/		one_sec++;
3589	/		hrestime.tv_sec++;
3590	/		hrestime.tv_nsec -= NANOSEC;
3591	/	}
3592	/ }
3593__adj_hrestime:
3594	movl	hrestime_adj, %esi	/ if (hrestime_adj == 0)
3595	movl	hrestime_adj+4, %edx
3596	andl	%esi, %esi
3597	jne	.CL4			/ no
3598	andl	%edx, %edx
3599	jne	.CL4			/ no
3600	subl	%ecx, %ecx		/ yes, adj = 0;
3601	subl	%edx, %edx
3602	jmp	.CL5
3603.CL4:
3604	subl	%ecx, %ecx
3605	subl	%eax, %eax
3606	subl	%esi, %ecx
3607	sbbl	%edx, %eax
3608	andl	%eax, %eax		/ if (hrestime_adj > 0)
3609	jge	.CL6
3610
3611	/ In the following comments, HRES_ADJ is used, while in the code
3612	/ max_hres_adj is used.
3613	/
3614	/ The test for "hrestime_adj < HRES_ADJ" is complicated because
3615	/ hrestime_adj is 64-bits, while HRES_ADJ is 32-bits.  We rely
3616	/ on the logical equivalence of:
3617	/
3618	/	!(hrestime_adj < HRES_ADJ)
3619	/
3620	/ and the two step sequence:
3621	/
3622	/	(HRES_ADJ - lsw(hrestime_adj)) generates a Borrow/Carry
3623	/
3624	/ which computes whether or not the least significant 32-bits
3625	/ of hrestime_adj is greater than HRES_ADJ, followed by:
3626	/
3627	/	Previous Borrow/Carry + -1 + msw(hrestime_adj) generates a Carry
3628	/
3629	/ which generates a carry whenever step 1 is true or the most
3630	/ significant long of the longlong hrestime_adj is non-zero.
3631
3632	movl	max_hres_adj, %ecx	/ hrestime_adj is positive
3633	subl	%esi, %ecx
3634	movl	%edx, %eax
3635	adcl	$-1, %eax
3636	jnc	.CL7
3637	movl	max_hres_adj, %ecx	/ adj = HRES_ADJ;
3638	subl	%edx, %edx
3639	jmp	.CL5
3640
3641	/ The following computation is similar to the one above.
3642	/
3643	/ The test for "hrestime_adj < -(HRES_ADJ)" is complicated because
3644	/ hrestime_adj is 64-bits, while HRES_ADJ is 32-bits.  We rely
3645	/ on the logical equivalence of:
3646	/
3647	/	(hrestime_adj > -HRES_ADJ)
3648	/
3649	/ and the two step sequence:
3650	/
3651	/	(HRES_ADJ + lsw(hrestime_adj)) generates a Carry
3652	/
3653	/ which means the least significant 32-bits of hrestime_adj is
3654	/ greater than -HRES_ADJ, followed by:
3655	/
3656	/	Previous Carry + 0 + msw(hrestime_adj) generates a Carry
3657	/
3658	/ which generates a carry only when step 1 is true and the most
3659	/ significant long of the longlong hrestime_adj is -1.
3660
3661.CL6:					/ hrestime_adj is negative
3662	movl	%esi, %ecx
3663	addl	max_hres_adj, %ecx
3664	movl	%edx, %eax
3665	adcl	$0, %eax
3666	jc	.CL7
3667	xor	%ecx, %ecx
3668	subl	max_hres_adj, %ecx	/ adj = -(HRES_ADJ);
3669	movl	$-1, %edx
3670	jmp	.CL5
3671.CL7:
3672	movl	%esi, %ecx		/ adj = hrestime_adj;
3673.CL5:
3674	movl	timedelta, %esi
3675	subl	%ecx, %esi
3676	movl	timedelta+4, %eax
3677	sbbl	%edx, %eax
3678	movl	%esi, timedelta
3679	movl	%eax, timedelta+4	/ timedelta -= adj;
3680	movl	%esi, hrestime_adj
3681	movl	%eax, hrestime_adj+4	/ hrestime_adj = timedelta;
3682	addl	hrestime+4, %ecx
3683
3684	movl	%ecx, %eax		/ eax = tv_nsec
36851:
3686	cmpl	$NANOSEC, %eax		/ if ((unsigned long)tv_nsec >= NANOSEC)
3687	jb	.CL8			/ no
3688	incl	one_sec			/ yes,  one_sec++;
3689	incl	hrestime		/ hrestime.tv_sec++;
3690	addl	$-NANOSEC, %eax		/ tv_nsec -= NANOSEC
3691	jmp	1b			/ check for more seconds
3692
3693.CL8:
3694	movl	%eax, hrestime+4	/ store final into hrestime.tv_nsec
3695	incl	hres_lock		/ release the hres_lock
3696
3697	popl	%ebx
3698	popl	%esi
3699	leave
3700	ret
3701	SET_SIZE(hres_tick)
3702
3703#endif	/* __i386 */
3704#endif	/* __lint */
3705
3706/*
3707 * void prefetch_smap_w(void *)
3708 *
3709 * Prefetch ahead within a linear list of smap structures.
3710 * Not implemented for ia32.  Stub for compatibility.
3711 */
3712
3713#if defined(__lint)
3714
3715/*ARGSUSED*/
3716void prefetch_smap_w(void *smp)
3717{}
3718
3719#else	/* __lint */
3720
3721	ENTRY(prefetch_smap_w)
3722	rep;	ret	/* use 2 byte return instruction when branch target */
3723			/* AMD Software Optimization Guide - Section 6.2 */
3724	SET_SIZE(prefetch_smap_w)
3725
3726#endif	/* __lint */
3727
3728/*
3729 * prefetch_page_r(page_t *)
3730 * issue prefetch instructions for a page_t
3731 */
3732#if defined(__lint)
3733
3734/*ARGSUSED*/
3735void
3736prefetch_page_r(void *pp)
3737{}
3738
3739#else	/* __lint */
3740
3741	ENTRY(prefetch_page_r)
3742	rep;	ret	/* use 2 byte return instruction when branch target */
3743			/* AMD Software Optimization Guide - Section 6.2 */
3744	SET_SIZE(prefetch_page_r)
3745
3746#endif	/* __lint */
3747
3748#if defined(__lint)
3749
3750/*ARGSUSED*/
3751int
3752bcmp(const void *s1, const void *s2, size_t count)
3753{ return (0); }
3754
3755#else   /* __lint */
3756
3757#if defined(__amd64)
3758
3759	ENTRY(bcmp)
3760	pushq	%rbp
3761	movq	%rsp, %rbp
3762#ifdef DEBUG
3763	movq	postbootkernelbase(%rip), %r11
3764	cmpq	%r11, %rdi
3765	jb	0f
3766	cmpq	%r11, %rsi
3767	jnb	1f
37680:	leaq	.bcmp_panic_msg(%rip), %rdi
3769	xorl	%eax, %eax
3770	call	panic
37711:
3772#endif	/* DEBUG */
3773	call	memcmp
3774	testl	%eax, %eax
3775	setne	%dl
3776	leave
3777	movzbl	%dl, %eax
3778	ret
3779	SET_SIZE(bcmp)
3780
3781#elif defined(__i386)
3782
3783#define	ARG_S1		8
3784#define	ARG_S2		12
3785#define	ARG_LENGTH	16
3786
3787	ENTRY(bcmp)
3788	pushl	%ebp
3789	movl	%esp, %ebp	/ create new stack frame
3790#ifdef DEBUG
3791	movl    postbootkernelbase, %eax
3792	cmpl    %eax, ARG_S1(%ebp)
3793	jb	0f
3794	cmpl    %eax, ARG_S2(%ebp)
3795	jnb	1f
37960:	pushl   $.bcmp_panic_msg
3797	call    panic
37981:
3799#endif	/* DEBUG */
3800
3801	pushl	%edi		/ save register variable
3802	movl	ARG_S1(%ebp), %eax	/ %eax = address of string 1
3803	movl	ARG_S2(%ebp), %ecx	/ %ecx = address of string 2
3804	cmpl	%eax, %ecx	/ if the same string
3805	je	.equal		/ goto .equal
3806	movl	ARG_LENGTH(%ebp), %edi	/ %edi = length in bytes
3807	cmpl	$4, %edi	/ if %edi < 4
3808	jb	.byte_check	/ goto .byte_check
3809	.align	4
3810.word_loop:
3811	movl	(%ecx), %edx	/ move 1 word from (%ecx) to %edx
3812	leal	-4(%edi), %edi	/ %edi -= 4
3813	cmpl	(%eax), %edx	/ compare 1 word from (%eax) with %edx
3814	jne	.word_not_equal	/ if not equal, goto .word_not_equal
3815	leal	4(%ecx), %ecx	/ %ecx += 4 (next word)
3816	leal	4(%eax), %eax	/ %eax += 4 (next word)
3817	cmpl	$4, %edi	/ if %edi >= 4
3818	jae	.word_loop	/ goto .word_loop
3819.byte_check:
3820	cmpl	$0, %edi	/ if %edi == 0
3821	je	.equal		/ goto .equal
3822	jmp	.byte_loop	/ goto .byte_loop (checks in bytes)
3823.word_not_equal:
3824	leal	4(%edi), %edi	/ %edi += 4 (post-decremented)
3825	.align	4
3826.byte_loop:
3827	movb	(%ecx),	%dl	/ move 1 byte from (%ecx) to %dl
3828	cmpb	%dl, (%eax)	/ compare %dl with 1 byte from (%eax)
3829	jne	.not_equal	/ if not equal, goto .not_equal
3830	incl	%ecx		/ %ecx++ (next byte)
3831	incl	%eax		/ %eax++ (next byte)
3832	decl	%edi		/ %edi--
3833	jnz	.byte_loop	/ if not zero, goto .byte_loop
3834.equal:
3835	xorl	%eax, %eax	/ %eax = 0
3836	popl	%edi		/ restore register variable
3837	leave			/ restore old stack frame
3838	ret			/ return (NULL)
3839	.align	4
3840.not_equal:
3841	movl	$1, %eax	/ return 1
3842	popl	%edi		/ restore register variable
3843	leave			/ restore old stack frame
3844	ret			/ return (NULL)
3845	SET_SIZE(bcmp)
3846
3847#endif	/* __i386 */
3848
3849#ifdef DEBUG
3850	.text
3851.bcmp_panic_msg:
3852	.string "bcmp: arguments below kernelbase"
3853#endif	/* DEBUG */
3854
3855#endif	/* __lint */
3856
3857#if defined(__lint)
3858
3859uint_t
3860bsrw_insn(uint16_t mask)
3861{
3862	uint_t index = sizeof (mask) * NBBY - 1;
3863
3864	while ((mask & (1 << index)) == 0)
3865		index--;
3866	return (index);
3867}
3868
3869#else	/* __lint */
3870
3871#if defined(__amd64)
3872
3873	ENTRY_NP(bsrw_insn)
3874	xorl	%eax, %eax
3875	bsrw	%di, %ax
3876	ret
3877	SET_SIZE(bsrw_insn)
3878
3879#elif defined(__i386)
3880
3881	ENTRY_NP(bsrw_insn)
3882	movw	4(%esp), %cx
3883	xorl	%eax, %eax
3884	bsrw	%cx, %ax
3885	ret
3886	SET_SIZE(bsrw_insn)
3887
3888#endif	/* __i386 */
3889#endif	/* __lint */
3890
3891#if defined(__lint)
3892
3893uint_t
3894atomic_btr32(uint32_t *pending, uint_t pil)
3895{
3896	return (*pending &= ~(1 << pil));
3897}
3898
3899#else	/* __lint */
3900
3901#if defined(__i386)
3902
3903	ENTRY_NP(atomic_btr32)
3904	movl	4(%esp), %ecx
3905	movl	8(%esp), %edx
3906	xorl	%eax, %eax
3907	lock
3908	btrl	%edx, (%ecx)
3909	setc	%al
3910	ret
3911	SET_SIZE(atomic_btr32)
3912
3913#endif	/* __i386 */
3914#endif	/* __lint */
3915
3916#if defined(__lint)
3917
3918/*ARGSUSED*/
3919void
3920switch_sp_and_call(void *newsp, void (*func)(uint_t, uint_t), uint_t arg1,
3921	    uint_t arg2)
3922{}
3923
3924#else	/* __lint */
3925
3926#if defined(__amd64)
3927
3928	ENTRY_NP(switch_sp_and_call)
3929	pushq	%rbp
3930	movq	%rsp, %rbp		/* set up stack frame */
3931	movq	%rdi, %rsp		/* switch stack pointer */
3932	movq	%rdx, %rdi		/* pass func arg 1 */
3933	movq	%rsi, %r11		/* save function to call */
3934	movq	%rcx, %rsi		/* pass func arg 2 */
3935	call	*%r11			/* call function */
3936	leave				/* restore stack */
3937	ret
3938	SET_SIZE(switch_sp_and_call)
3939
3940#elif defined(__i386)
3941
3942	ENTRY_NP(switch_sp_and_call)
3943	pushl	%ebp
3944	mov	%esp, %ebp		/* set up stack frame */
3945	movl	8(%ebp), %esp		/* switch stack pointer */
3946	pushl	20(%ebp)		/* push func arg 2 */
3947	pushl	16(%ebp)		/* push func arg 1 */
3948	call	*12(%ebp)		/* call function */
3949	addl	$8, %esp		/* pop arguments */
3950	leave				/* restore stack */
3951	ret
3952	SET_SIZE(switch_sp_and_call)
3953
3954#endif	/* __i386 */
3955#endif	/* __lint */
3956
3957#if defined(__lint)
3958
3959void
3960kmdb_enter(void)
3961{}
3962
3963#else	/* __lint */
3964
3965#if defined(__amd64)
3966
3967	ENTRY_NP(kmdb_enter)
3968	pushq	%rbp
3969	movq	%rsp, %rbp
3970
3971	/*
3972	 * Save flags, do a 'cli' then return the saved flags
3973	 */
3974	call	intr_clear
3975
3976	int	$T_DBGENTR
3977
3978	/*
3979	 * Restore the saved flags
3980	 */
3981	movq	%rax, %rdi
3982	call	intr_restore
3983
3984	leave
3985	ret
3986	SET_SIZE(kmdb_enter)
3987
3988#elif defined(__i386)
3989
3990	ENTRY_NP(kmdb_enter)
3991	pushl	%ebp
3992	movl	%esp, %ebp
3993
3994	/*
3995	 * Save flags, do a 'cli' then return the saved flags
3996	 */
3997	call	intr_clear
3998
3999	int	$T_DBGENTR
4000
4001	/*
4002	 * Restore the saved flags
4003	 */
4004	pushl	%eax
4005	call	intr_restore
4006	addl	$4, %esp
4007
4008	leave
4009	ret
4010	SET_SIZE(kmdb_enter)
4011
4012#endif	/* __i386 */
4013#endif	/* __lint */
4014
4015#if defined(__lint)
4016
4017void
4018return_instr(void)
4019{}
4020
4021#else	/* __lint */
4022
4023	ENTRY_NP(return_instr)
4024	rep;	ret	/* use 2 byte instruction when branch target */
4025			/* AMD Software Optimization Guide - Section 6.2 */
4026	SET_SIZE(return_instr)
4027
4028#endif	/* __lint */
4029
4030#if defined(__lint)
4031
4032ulong_t
4033getflags(void)
4034{
4035	return (0);
4036}
4037
4038#else	/* __lint */
4039
4040#if defined(__amd64)
4041
4042	ENTRY(getflags)
4043	pushfq
4044	popq	%rax
4045#if defined(__xpv)
4046	CURTHREAD(%rdi)
4047	KPREEMPT_DISABLE(%rdi)
4048	/*
4049	 * Synthesize the PS_IE bit from the event mask bit
4050	 */
4051	CURVCPU(%r11)
4052	andq    $_BITNOT(PS_IE), %rax
4053	XEN_TEST_UPCALL_MASK(%r11)
4054	jnz	1f
4055	orq	$PS_IE, %rax
40561:
4057	KPREEMPT_ENABLE_NOKP(%rdi)
4058#endif
4059	ret
4060	SET_SIZE(getflags)
4061
4062#elif defined(__i386)
4063
4064	ENTRY(getflags)
4065	pushfl
4066	popl	%eax
4067#if defined(__xpv)
4068	CURTHREAD(%ecx)
4069	KPREEMPT_DISABLE(%ecx)
4070	/*
4071	 * Synthesize the PS_IE bit from the event mask bit
4072	 */
4073	CURVCPU(%edx)
4074	andl    $_BITNOT(PS_IE), %eax
4075	XEN_TEST_UPCALL_MASK(%edx)
4076	jnz	1f
4077	orl	$PS_IE, %eax
40781:
4079	KPREEMPT_ENABLE_NOKP(%ecx)
4080#endif
4081	ret
4082	SET_SIZE(getflags)
4083
4084#endif	/* __i386 */
4085
4086#endif	/* __lint */
4087
4088#if defined(__lint)
4089
4090ftrace_icookie_t
4091ftrace_interrupt_disable(void)
4092{ return (0); }
4093
4094#else   /* __lint */
4095
4096#if defined(__amd64)
4097
4098	ENTRY(ftrace_interrupt_disable)
4099	pushfq
4100	popq	%rax
4101	CLI(%rdx)
4102	ret
4103	SET_SIZE(ftrace_interrupt_disable)
4104
4105#elif defined(__i386)
4106
4107	ENTRY(ftrace_interrupt_disable)
4108	pushfl
4109	popl	%eax
4110	CLI(%edx)
4111	ret
4112	SET_SIZE(ftrace_interrupt_disable)
4113
4114#endif	/* __i386 */
4115#endif	/* __lint */
4116
4117#if defined(__lint)
4118
4119/*ARGSUSED*/
4120void
4121ftrace_interrupt_enable(ftrace_icookie_t cookie)
4122{}
4123
4124#else	/* __lint */
4125
4126#if defined(__amd64)
4127
4128	ENTRY(ftrace_interrupt_enable)
4129	pushq	%rdi
4130	popfq
4131	ret
4132	SET_SIZE(ftrace_interrupt_enable)
4133
4134#elif defined(__i386)
4135
4136	ENTRY(ftrace_interrupt_enable)
4137	movl	4(%esp), %eax
4138	pushl	%eax
4139	popfl
4140	ret
4141	SET_SIZE(ftrace_interrupt_enable)
4142
4143#endif	/* __i386 */
4144#endif	/* __lint */
4145